qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH RFC] target-ppc: tlbie should have global effect
@ 2016-09-09  4:23 Nikunj A Dadhania
  2016-09-09  4:30 ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 7+ messages in thread
From: Nikunj A Dadhania @ 2016-09-09  4:23 UTC (permalink / raw)
  To: qemu-ppc, david, benh; +Cc: alex.bennee, qemu-devel, rth, nikunj

tlbie (and H_REMOVE for pseries) should have a global effect. This is
achieved by iterating and setting tlb_need_flush in all the CPUs.

Suggested-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>

--

Note: Haven't changed following POWERPC_MMU_32B and POWERPC_MMU_601 yet.
As I am not sure about it.
---
 target-ppc/cpu.h         |  3 ++-
 target-ppc/helper.h      |  1 +
 target-ppc/helper_regs.h | 13 +++++++++----
 target-ppc/mmu-hash64.c  | 16 ++++++++++------
 target-ppc/mmu_helper.c  | 29 +++++++++++++++++++++++------
 target-ppc/translate.c   |  2 +-
 6 files changed, 46 insertions(+), 18 deletions(-)

diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 1e808c8..59889f8 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -1251,7 +1251,8 @@ void store_40x_sler (CPUPPCState *env, uint32_t val);
 void store_booke_tcr (CPUPPCState *env, target_ulong val);
 void store_booke_tsr (CPUPPCState *env, target_ulong val);
 void ppc_tlb_invalidate_all (CPUPPCState *env);
-void ppc_tlb_invalidate_one (CPUPPCState *env, target_ulong addr);
+void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr,
+                            unsigned int global);
 void cpu_ppc_set_papr(PowerPCCPU *cpu);
 #endif
 #endif
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index dcf3f95..79fb688 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -561,6 +561,7 @@ DEF_HELPER_2(74xx_tlbd, void, env, tl)
 DEF_HELPER_2(74xx_tlbi, void, env, tl)
 DEF_HELPER_FLAGS_1(tlbia, TCG_CALL_NO_RWG, void, env)
 DEF_HELPER_FLAGS_2(tlbie, TCG_CALL_NO_RWG, void, env, tl)
+DEF_HELPER_FLAGS_2(tlbiel, TCG_CALL_NO_RWG, void, env, tl)
 DEF_HELPER_FLAGS_2(tlbiva, TCG_CALL_NO_RWG, void, env, tl)
 #if defined(TARGET_PPC64)
 DEF_HELPER_FLAGS_3(store_slb, TCG_CALL_NO_RWG, void, env, tl, tl)
diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h
index 3d279f1..f3eb21d 100644
--- a/target-ppc/helper_regs.h
+++ b/target-ppc/helper_regs.h
@@ -156,10 +156,15 @@ static inline int hreg_store_msr(CPUPPCState *env, target_ulong value,
 #if !defined(CONFIG_USER_ONLY)
 static inline void check_tlb_flush(CPUPPCState *env)
 {
-    CPUState *cs = CPU(ppc_env_get_cpu(env));
-    if (env->tlb_need_flush) {
-        env->tlb_need_flush = 0;
-        tlb_flush(cs, 1);
+    CPUState *cs;
+
+    CPU_FOREACH(cs) {
+        PowerPCCPU *cpu = POWERPC_CPU(cs);
+        CPUPPCState *env = &cpu->env;
+        if (env->tlb_need_flush) {
+            env->tlb_need_flush = 0;
+            tlb_flush(cs, 1);
+        }
     }
 }
 #else
diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c
index 8118143..a76c92b 100644
--- a/target-ppc/mmu-hash64.c
+++ b/target-ppc/mmu-hash64.c
@@ -907,12 +907,16 @@ void ppc_hash64_tlb_flush_hpte(PowerPCCPU *cpu,
                                target_ulong pte_index,
                                target_ulong pte0, target_ulong pte1)
 {
-    /*
-     * XXX: given the fact that there are too many segments to
-     * invalidate, and we still don't have a tlb_flush_mask(env, n,
-     * mask) in QEMU, we just invalidate all TLBs
-     */
-    tlb_flush(CPU(cpu), 1);
+    CPUState *cs;
+
+    CPU_FOREACH(cs) {
+        /*
+         * XXX: given the fact that there are too many segments to
+         * invalidate, and we still don't have a tlb_flush_mask(env, n,
+         * mask) in QEMU, we just invalidate all TLBs
+         */
+        tlb_flush(cs, 1);
+    }
 }
 
 void ppc_hash64_update_rmls(CPUPPCState *env)
diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c
index 696bb03..1923f1b 100644
--- a/target-ppc/mmu_helper.c
+++ b/target-ppc/mmu_helper.c
@@ -1946,7 +1946,8 @@ void ppc_tlb_invalidate_all(CPUPPCState *env)
     }
 }
 
-void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr)
+void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr,
+                            unsigned int global)
 {
 #if !defined(FLUSH_ALL_TLBS)
     addr &= TARGET_PAGE_MASK;
@@ -1979,7 +1980,14 @@ void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr)
          *      and we still don't have a tlb_flush_mask(env, n, mask) in QEMU,
          *      we just invalidate all TLBs
          */
-        env->tlb_need_flush = 1;
+        if (!global) {
+            env->tlb_need_flush = 1;
+        } else {
+            CPUState *other_cs;
+            CPU_FOREACH(other_cs) {
+                env->tlb_need_flush = 1;
+            }
+        }
         break;
 #endif /* defined(TARGET_PPC64) */
     default:
@@ -2078,7 +2086,12 @@ void helper_tlbia(CPUPPCState *env)
 
 void helper_tlbie(CPUPPCState *env, target_ulong addr)
 {
-    ppc_tlb_invalidate_one(env, addr);
+    ppc_tlb_invalidate_one(env, addr, 1);
+}
+
+void helper_tlbiel(CPUPPCState *env, target_ulong addr)
+{
+    ppc_tlb_invalidate_one(env, addr, 0);
 }
 
 void helper_tlbiva(CPUPPCState *env, target_ulong addr)
@@ -2757,7 +2770,7 @@ static inline void booke206_invalidate_ea_tlb(CPUPPCState *env, int tlbn,
 
 void helper_booke206_tlbivax(CPUPPCState *env, target_ulong address)
 {
-    PowerPCCPU *cpu = ppc_env_get_cpu(env);
+    CPUState *cs;
 
     if (address & 0x4) {
         /* flush all entries */
@@ -2774,11 +2787,15 @@ void helper_booke206_tlbivax(CPUPPCState *env, target_ulong address)
     if (address & 0x8) {
         /* flush TLB1 entries */
         booke206_invalidate_ea_tlb(env, 1, address);
-        tlb_flush(CPU(cpu), 1);
+        CPU_FOREACH(cs) {
+            tlb_flush(cs, 1);
+        }
     } else {
         /* flush TLB0 entries */
         booke206_invalidate_ea_tlb(env, 0, address);
-        tlb_flush_page(CPU(cpu), address & MAS2_EPN_MASK);
+        CPU_FOREACH(cs) {
+            tlb_flush_page(cs, address & MAS2_EPN_MASK);
+        }
     }
 }
 
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 618334a..86a8fb6 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -4432,7 +4432,7 @@ static void gen_tlbiel(DisasContext *ctx)
 #else
     CHK_SV;
 
-    gen_helper_tlbie(cpu_env, cpu_gpr[rB(ctx->opcode)]);
+    gen_helper_tlbiel(cpu_env, cpu_gpr[rB(ctx->opcode)]);
 #endif /* defined(CONFIG_USER_ONLY) */
 }
 
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [Qemu-devel] [PATCH RFC] target-ppc: tlbie should have global effect
  2016-09-09  4:23 [Qemu-devel] [PATCH RFC] target-ppc: tlbie should have global effect Nikunj A Dadhania
@ 2016-09-09  4:30 ` Benjamin Herrenschmidt
  2016-09-09  4:45   ` Nikunj A Dadhania
  0 siblings, 1 reply; 7+ messages in thread
From: Benjamin Herrenschmidt @ 2016-09-09  4:30 UTC (permalink / raw)
  To: Nikunj A Dadhania, qemu-ppc, david; +Cc: alex.bennee, qemu-devel, rth

On Fri, 2016-09-09 at 09:53 +0530, Nikunj A Dadhania wrote:
> tlbie (and H_REMOVE for pseries) should have a global effect. This is
> achieved by iterating and setting tlb_need_flush in all the CPUs.
> 
> Suggested-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
> 
> --
> 
> Note: Haven't changed following POWERPC_MMU_32B and POWERPC_MMU_601
> yet.
> As I am not sure about it.

604 and 7400 can do SMP.

That said, I think the approach in your patch is going to be a bit big
of a hammer. We should have a separate flag indicating that we need to
broadcast a flush and only set it on tlbie (non-l) and tlbivax (on
BookE) so that we don't end up doing expensive broadcasts on things
like context switches.

We keep the existing logic to flush locally. We additionally replace
the one in ptesync (BookS) or tlbsync (BookE) to test for the broadcast
flag, and flush the "other" CPUs if set.

That also means you have a nice spot to do the more complex MT-TCG
broadcast only when needed in the future.

Cheers,
Ben.

> ---
>  target-ppc/cpu.h         |  3 ++-
>  target-ppc/helper.h      |  1 +
>  target-ppc/helper_regs.h | 13 +++++++++----
>  target-ppc/mmu-hash64.c  | 16 ++++++++++------
>  target-ppc/mmu_helper.c  | 29 +++++++++++++++++++++++------
>  target-ppc/translate.c   |  2 +-
>  6 files changed, 46 insertions(+), 18 deletions(-)
> 
> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
> index 1e808c8..59889f8 100644
> --- a/target-ppc/cpu.h
> +++ b/target-ppc/cpu.h
> @@ -1251,7 +1251,8 @@ void store_40x_sler (CPUPPCState *env, uint32_t
> val);
>  void store_booke_tcr (CPUPPCState *env, target_ulong val);
>  void store_booke_tsr (CPUPPCState *env, target_ulong val);
>  void ppc_tlb_invalidate_all (CPUPPCState *env);
> -void ppc_tlb_invalidate_one (CPUPPCState *env, target_ulong addr);
> +void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr,
> +                            unsigned int global);
>  void cpu_ppc_set_papr(PowerPCCPU *cpu);
>  #endif
>  #endif
> diff --git a/target-ppc/helper.h b/target-ppc/helper.h
> index dcf3f95..79fb688 100644
> --- a/target-ppc/helper.h
> +++ b/target-ppc/helper.h
> @@ -561,6 +561,7 @@ DEF_HELPER_2(74xx_tlbd, void, env, tl)
>  DEF_HELPER_2(74xx_tlbi, void, env, tl)
>  DEF_HELPER_FLAGS_1(tlbia, TCG_CALL_NO_RWG, void, env)
>  DEF_HELPER_FLAGS_2(tlbie, TCG_CALL_NO_RWG, void, env, tl)
> +DEF_HELPER_FLAGS_2(tlbiel, TCG_CALL_NO_RWG, void, env, tl)
>  DEF_HELPER_FLAGS_2(tlbiva, TCG_CALL_NO_RWG, void, env, tl)
>  #if defined(TARGET_PPC64)
>  DEF_HELPER_FLAGS_3(store_slb, TCG_CALL_NO_RWG, void, env, tl, tl)
> diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h
> index 3d279f1..f3eb21d 100644
> --- a/target-ppc/helper_regs.h
> +++ b/target-ppc/helper_regs.h
> @@ -156,10 +156,15 @@ static inline int hreg_store_msr(CPUPPCState
> *env, target_ulong value,
>  #if !defined(CONFIG_USER_ONLY)
>  static inline void check_tlb_flush(CPUPPCState *env)
>  {
> -    CPUState *cs = CPU(ppc_env_get_cpu(env));
> -    if (env->tlb_need_flush) {
> -        env->tlb_need_flush = 0;
> -        tlb_flush(cs, 1);
> +    CPUState *cs;
> +
> +    CPU_FOREACH(cs) {
> +        PowerPCCPU *cpu = POWERPC_CPU(cs);
> +        CPUPPCState *env = &cpu->env;
> +        if (env->tlb_need_flush) {
> +            env->tlb_need_flush = 0;
> +            tlb_flush(cs, 1);
> +        }
>      }
>  }
>  #else
> diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c
> index 8118143..a76c92b 100644
> --- a/target-ppc/mmu-hash64.c
> +++ b/target-ppc/mmu-hash64.c
> @@ -907,12 +907,16 @@ void ppc_hash64_tlb_flush_hpte(PowerPCCPU *cpu,
>                                 target_ulong pte_index,
>                                 target_ulong pte0, target_ulong pte1)
>  {
> -    /*
> -     * XXX: given the fact that there are too many segments to
> -     * invalidate, and we still don't have a tlb_flush_mask(env, n,
> -     * mask) in QEMU, we just invalidate all TLBs
> -     */
> -    tlb_flush(CPU(cpu), 1);
> +    CPUState *cs;
> +
> +    CPU_FOREACH(cs) {
> +        /*
> +         * XXX: given the fact that there are too many segments to
> +         * invalidate, and we still don't have a tlb_flush_mask(env,
> n,
> +         * mask) in QEMU, we just invalidate all TLBs
> +         */
> +        tlb_flush(cs, 1);
> +    }
>  }
>  
>  void ppc_hash64_update_rmls(CPUPPCState *env)
> diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c
> index 696bb03..1923f1b 100644
> --- a/target-ppc/mmu_helper.c
> +++ b/target-ppc/mmu_helper.c
> @@ -1946,7 +1946,8 @@ void ppc_tlb_invalidate_all(CPUPPCState *env)
>      }
>  }
>  
> -void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr)
> +void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr,
> +                            unsigned int global)
>  {
>  #if !defined(FLUSH_ALL_TLBS)
>      addr &= TARGET_PAGE_MASK;
> @@ -1979,7 +1980,14 @@ void ppc_tlb_invalidate_one(CPUPPCState *env,
> target_ulong addr)
>           *      and we still don't have a tlb_flush_mask(env, n,
> mask) in QEMU,
>           *      we just invalidate all TLBs
>           */
> -        env->tlb_need_flush = 1;
> +        if (!global) {
> +            env->tlb_need_flush = 1;
> +        } else {
> +            CPUState *other_cs;
> +            CPU_FOREACH(other_cs) {
> +                env->tlb_need_flush = 1;
> +            }
> +        }
>          break;
>  #endif /* defined(TARGET_PPC64) */
>      default:
> @@ -2078,7 +2086,12 @@ void helper_tlbia(CPUPPCState *env)
>  
>  void helper_tlbie(CPUPPCState *env, target_ulong addr)
>  {
> -    ppc_tlb_invalidate_one(env, addr);
> +    ppc_tlb_invalidate_one(env, addr, 1);
> +}
> +
> +void helper_tlbiel(CPUPPCState *env, target_ulong addr)
> +{
> +    ppc_tlb_invalidate_one(env, addr, 0);
>  }
>  
>  void helper_tlbiva(CPUPPCState *env, target_ulong addr)
> @@ -2757,7 +2770,7 @@ static inline void
> booke206_invalidate_ea_tlb(CPUPPCState *env, int tlbn,
>  
>  void helper_booke206_tlbivax(CPUPPCState *env, target_ulong address)
>  {
> -    PowerPCCPU *cpu = ppc_env_get_cpu(env);
> +    CPUState *cs;
>  
>      if (address & 0x4) {
>          /* flush all entries */
> @@ -2774,11 +2787,15 @@ void helper_booke206_tlbivax(CPUPPCState
> *env, target_ulong address)
>      if (address & 0x8) {
>          /* flush TLB1 entries */
>          booke206_invalidate_ea_tlb(env, 1, address);
> -        tlb_flush(CPU(cpu), 1);
> +        CPU_FOREACH(cs) {
> +            tlb_flush(cs, 1);
> +        }
>      } else {
>          /* flush TLB0 entries */
>          booke206_invalidate_ea_tlb(env, 0, address);
> -        tlb_flush_page(CPU(cpu), address & MAS2_EPN_MASK);
> +        CPU_FOREACH(cs) {
> +            tlb_flush_page(cs, address & MAS2_EPN_MASK);
> +        }
>      }
>  }
>  
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index 618334a..86a8fb6 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -4432,7 +4432,7 @@ static void gen_tlbiel(DisasContext *ctx)
>  #else
>      CHK_SV;
>  
> -    gen_helper_tlbie(cpu_env, cpu_gpr[rB(ctx->opcode)]);
> +    gen_helper_tlbiel(cpu_env, cpu_gpr[rB(ctx->opcode)]);
>  #endif /* defined(CONFIG_USER_ONLY) */
>  }
>  

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Qemu-devel] [PATCH RFC] target-ppc: tlbie should have global effect
  2016-09-09  4:30 ` Benjamin Herrenschmidt
@ 2016-09-09  4:45   ` Nikunj A Dadhania
  2016-09-09  5:00     ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 7+ messages in thread
From: Nikunj A Dadhania @ 2016-09-09  4:45 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, qemu-ppc, david; +Cc: alex.bennee, qemu-devel, rth

Benjamin Herrenschmidt <benh@kernel.crashing.org> writes:

> On Fri, 2016-09-09 at 09:53 +0530, Nikunj A Dadhania wrote:
>> tlbie (and H_REMOVE for pseries) should have a global effect. This is
>> achieved by iterating and setting tlb_need_flush in all the CPUs.
>> 
>> Suggested-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
>> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
>> 
>> --
>> 
>> Note: Haven't changed following POWERPC_MMU_32B and POWERPC_MMU_601
>> yet.
>> As I am not sure about it.
>
> 604 and 7400 can do SMP.

Sure, will add similar logic there.

> That said, I think the approach in your patch is going to be a bit big
> of a hammer.

> We should have a separate flag indicating that we need to
> broadcast a flush and only set it on tlbie (non-l) and tlbivax (on
> BookE) so that we don't end up doing expensive broadcasts on things
> like context switches.

I had implemented that initially, and was checking that in
check_tlb_flush, the logic was getting complicated, so thought about
this way.

Moreover, I was thinking about it, that needs to be a global tcg flag
and not part the CPUPPCState structure, I was then worried about how to
synchronise a global tcg variable.

> We keep the existing logic to flush locally. We additionally replace
> the one in ptesync (BookS) or tlbsync (BookE) to test for the broadcast
> flag, and flush the "other" CPUs if set.
>
> That also means you have a nice spot to do the more complex MT-TCG
> broadcast only when needed in the future.
>

Regards
Nikunj

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Qemu-devel] [PATCH RFC] target-ppc: tlbie should have global effect
  2016-09-09  4:45   ` Nikunj A Dadhania
@ 2016-09-09  5:00     ` Benjamin Herrenschmidt
  2016-09-09  5:03       ` Benjamin Herrenschmidt
  2016-09-09  5:08       ` Nikunj A Dadhania
  0 siblings, 2 replies; 7+ messages in thread
From: Benjamin Herrenschmidt @ 2016-09-09  5:00 UTC (permalink / raw)
  To: Nikunj A Dadhania, qemu-ppc, david; +Cc: alex.bennee, qemu-devel, rth

On Fri, 2016-09-09 at 10:15 +0530, Nikunj A Dadhania wrote:
> > Benjamin Herrenschmidt <benh@kernel.crashing.org> writes:
> 
> > 
> > On Fri, 2016-09-09 at 09:53 +0530, Nikunj A Dadhania wrote:
> > > 
> > > tlbie (and H_REMOVE for pseries) should have a global effect. This is
> > > achieved by iterating and setting tlb_need_flush in all the CPUs.
> > > 
> > > > > > Suggested-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> > > > > > Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
> > > 
> > > --
> > > 
> > > Note: Haven't changed following POWERPC_MMU_32B and POWERPC_MMU_601
> > > yet.
> > > As I am not sure about it.
> > 
> > 604 and 7400 can do SMP.
> 
> Sure, will add similar logic there.
> 
> > 
> > That said, I think the approach in your patch is going to be a bit big
> > of a hammer.
> 
> > 
> > We should have a separate flag indicating that we need to
> > broadcast a flush and only set it on tlbie (non-l) and tlbivax (on
> > BookE) so that we don't end up doing expensive broadcasts on things
> > like context switches.
> 
> I had implemented that initially, and was checking that in
> check_tlb_flush, the logic was getting complicated, so thought about
> this way.
> 
> Moreover, I was thinking about it, that needs to be a global tcg flag
> and not part the CPUPPCState structure, I was then worried about how to
> synchronise a global tcg variable.

No it doesn't.

When a "broadcast TLB" op happens, such as tlbie, you set both flags.
The existing one which just means the current CPU needs flushing, that
logic doesnt' change at all.

The other one means that *this* CPU needs to broadcast a TLB inval,
thus it's also a local flag !

Then you change gen_check_tlb_flush() to take an argument (an immediate
value) which you set to 1 in ptesync on BookS and tlbsync on BookE and
leave 0 on all the others.

Finally, gen_check_tlb_flush() does what it does today, then *additionally*
if that argument was 1 *and* the broadcast_flush flag was set, the helper
can then shoot the invalidations to all the other CPUs (and clear the flag).

	CPU_FOREACH(cs) {
		if (cs != this_cpu) {
			tlb_flush(...)
		}
	}


For MT-TCG, the only change is then in that the above loop does async
procesdure calls to the target CPUs.

You will need to make sure ptesync/tlbsync (the latter only on BookE, make
it a nop on BookS) also exit the TB which is easy to do so that we get
the synchronization with the pending async works as Paolo mentioned separately
(double check with him and/or Alex ot make sure you get that right).

> > We keep the existing logic to flush locally. We additionally replace
> > the one in ptesync (BookS) or tlbsync (BookE) to test for the broadcast
> > flag, and flush the "other" CPUs if set.
> > 
> > That also means you have a nice spot to do the more complex MT-TCG
> > broadcast only when needed in the future.
> > 
> 
> Regards
> Nikunj

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Qemu-devel] [PATCH RFC] target-ppc: tlbie should have global effect
  2016-09-09  5:00     ` Benjamin Herrenschmidt
@ 2016-09-09  5:03       ` Benjamin Herrenschmidt
  2016-09-09  5:08       ` Nikunj A Dadhania
  1 sibling, 0 replies; 7+ messages in thread
From: Benjamin Herrenschmidt @ 2016-09-09  5:03 UTC (permalink / raw)
  To: Nikunj A Dadhania, qemu-ppc, david; +Cc: alex.bennee, qemu-devel, rth

On Fri, 2016-09-09 at 15:00 +1000, Benjamin Herrenschmidt wrote:
> 
> No it doesn't.
> 
> When a "broadcast TLB" op happens, such as tlbie, you set both flags.
> The existing one which just means the current CPU needs flushing, that
> logic doesnt' change at all.
> 
> The other one means that *this* CPU needs to broadcast a TLB inval,
> thus it's also a local flag !
> 
> Then you change gen_check_tlb_flush() to take an argument (an immediate
> value) which you set to 1 in ptesync on BookS and tlbsync on BookE and
> leave 0 on all the others.

One subtle thing here: gen_check_tlb_flush now needs to check both
flags. Easy enough to OR them before the brcond. Another option is
to make the flag a bit mask.

Because the "lcoal" need flush might get cleared (by an isync or an
interrupt for example) before we hit the ptesync that will do the
broadcast.
 
> Finally, gen_check_tlb_flush() does what it does today, then *additionally*
> if that argument was 1 *and* the broadcast_flush flag was set, the helper
> can then shoot the invalidations to all the other CPUs (and clear the flag).
> 
> 	CPU_FOREACH(cs) {
> > 		if (cs != this_cpu) {
> > 			tlb_flush(...)
> > 		}
> 	}
> 
> 
> For MT-TCG, the only change is then in that the above loop does async
> procesdure calls to the target CPUs.
> 
> You will need to make sure ptesync/tlbsync (the latter only on BookE, make
> it a nop on BookS) also exit the TB which is easy to do so that we get
> the synchronization with the pending async works as Paolo mentioned separately
> (double check with him and/or Alex ot make sure you get that right).
> 
> > 
> > > 
> > > We keep the existing logic to flush locally. We additionally replace
> > > the one in ptesync (BookS) or tlbsync (BookE) to test for the broadcast
> > > flag, and flush the "other" CPUs if set.
> > > 
> > > That also means you have a nice spot to do the more complex MT-TCG
> > > broadcast only when needed in the future.
> > > 
> > 
> > Regards
> > Nikunj

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Qemu-devel] [PATCH RFC] target-ppc: tlbie should have global effect
  2016-09-09  5:00     ` Benjamin Herrenschmidt
  2016-09-09  5:03       ` Benjamin Herrenschmidt
@ 2016-09-09  5:08       ` Nikunj A Dadhania
  2016-09-09  5:49         ` Benjamin Herrenschmidt
  1 sibling, 1 reply; 7+ messages in thread
From: Nikunj A Dadhania @ 2016-09-09  5:08 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, qemu-ppc, david; +Cc: alex.bennee, qemu-devel, rth

Benjamin Herrenschmidt <benh@kernel.crashing.org> writes:

> On Fri, 2016-09-09 at 10:15 +0530, Nikunj A Dadhania wrote:
>> > Benjamin Herrenschmidt <benh@kernel.crashing.org> writes:
>> 
>> > 
>> > On Fri, 2016-09-09 at 09:53 +0530, Nikunj A Dadhania wrote:
>> > > 
>> > > tlbie (and H_REMOVE for pseries) should have a global effect. This is
>> > > achieved by iterating and setting tlb_need_flush in all the CPUs.
>> > > 
>> > > > > > Suggested-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
>> > > > > > Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
>> > > 
>> > > --
>> > > 
>> > > Note: Haven't changed following POWERPC_MMU_32B and POWERPC_MMU_601
>> > > yet.
>> > > As I am not sure about it.
>> > 
>> > 604 and 7400 can do SMP.
>> 
>> Sure, will add similar logic there.
>> 
>> > 
>> > That said, I think the approach in your patch is going to be a bit big
>> > of a hammer.
>> 
>> > 
>> > We should have a separate flag indicating that we need to
>> > broadcast a flush and only set it on tlbie (non-l) and tlbivax (on
>> > BookE) so that we don't end up doing expensive broadcasts on things
>> > like context switches.
>> 
>> I had implemented that initially, and was checking that in
>> check_tlb_flush, the logic was getting complicated, so thought about
>> this way.
>> 
>> Moreover, I was thinking about it, that needs to be a global tcg flag
>> and not part the CPUPPCState structure, I was then worried about how to
>> synchronise a global tcg variable.
>
> No it doesn't.
>
> When a "broadcast TLB" op happens, such as tlbie, you set both flags.
> The existing one which just means the current CPU needs flushing, that
> logic doesnt' change at all.
>
> The other one means that *this* CPU needs to broadcast a TLB inval,
> thus it's also a local flag !
>
> Then you change gen_check_tlb_flush() to take an argument (an immediate
> value) which you set to 1 in ptesync on BookS and tlbsync on BookE and
> leave 0 on all the others.

One more question, when in gen_check_tlb_flush, don't I need to see if
other CPU has global flag set ?

Regards,
Nikunj

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Qemu-devel] [PATCH RFC] target-ppc: tlbie should have global effect
  2016-09-09  5:08       ` Nikunj A Dadhania
@ 2016-09-09  5:49         ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 7+ messages in thread
From: Benjamin Herrenschmidt @ 2016-09-09  5:49 UTC (permalink / raw)
  To: Nikunj A Dadhania, qemu-ppc, david; +Cc: alex.bennee, qemu-devel, rth

On Fri, 2016-09-09 at 10:38 +0530, Nikunj A Dadhania wrote:
> One more question, when in gen_check_tlb_flush, don't I need to see
> if other CPU has global flag set ?

No, you leave it completely alone.

You can clear it's local flag as part of the flush (in the MT-TCG case
that can be done by the async handler) because if it had a "dirty" TLB
there is no point in it flushing it twice, but the global flag stays.

If the whole operation was completely synchronous, I suppose you could
clear the other's guy global flag, but it's not so I'd rather not take
chances for now.

Ben.

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2016-09-09  5:50 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-09-09  4:23 [Qemu-devel] [PATCH RFC] target-ppc: tlbie should have global effect Nikunj A Dadhania
2016-09-09  4:30 ` Benjamin Herrenschmidt
2016-09-09  4:45   ` Nikunj A Dadhania
2016-09-09  5:00     ` Benjamin Herrenschmidt
2016-09-09  5:03       ` Benjamin Herrenschmidt
2016-09-09  5:08       ` Nikunj A Dadhania
2016-09-09  5:49         ` Benjamin Herrenschmidt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).