linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [RFC] [PATCH] Trace TLBIE's
@ 2016-11-23  8:06 Balbir Singh
  2016-11-23 10:15 ` Michael Ellerman
  0 siblings, 1 reply; 6+ messages in thread
From: Balbir Singh @ 2016-11-23  8:06 UTC (permalink / raw)
  To: Michael Ellerman, linuxppc-dev; +Cc: Paul Mackerras, Nicholas Piggin


Just a quick patch to trace tlbie(l)'s. The idea being that it can be
enabled when we suspect corruption or when we need to see if we are doing
the right thing during flush. I think the format can be enhanced to
make it nicer (expand the RB/RS/IS/L cases in more detail). For now I am
sharing the idea to get inputs

A typical trace might look like this


<...>-5141  [062]  1354.486693: tlbie:                
	tlbie with lpid 0, local 0, rb=7b5d0ff874f11f1, rs=0, ric=0 prs=0 r=0
systemd-udevd-2584  [018]  1354.486772: tlbie:
	tlbie with lpid 0, local 0, rb=17be1f421adc10c1, rs=0, ric=0 prs=0 r=0
...

qemu-system-ppc-5371  [016]  1412.369519: tlbie:
	tlbie with lpid 0, local 1, rb=67bd8900174c11c1, rs=0, ric=0 prs=0 r=0
qemu-system-ppc-5377  [056]  1421.687262: tlbie:
	tlbie with lpid 1, local 0, rb=5f04edffa00c11c1, rs=1, ric=0 prs=0 r=0


Signed-off-by: Balbir Singh <bsingharora@gmail.com>


diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h
index 32e36b1..b4e02ba 100644
--- a/arch/powerpc/include/asm/trace.h
+++ b/arch/powerpc/include/asm/trace.h
@@ -168,6 +168,38 @@ TRACE_EVENT(hash_fault,
 		      __entry->addr, __entry->access, __entry->trap)
 );
 
+
+TRACE_EVENT(tlbie,
+
+	TP_PROTO(unsigned long lpid, unsigned long local, unsigned long rb,
+		unsigned long rs, unsigned long ric, unsigned long prs,
+		unsigned long r),
+	TP_ARGS(lpid, local, rb, rs, ric, prs, r),
+	TP_STRUCT__entry(
+		__field(unsigned long, lpid)
+		__field(unsigned long, local)
+		__field(unsigned long, rb)
+		__field(unsigned long, rs)
+		__field(unsigned long, ric)
+		__field(unsigned long, prs)
+		__field(unsigned long, r)
+		),
+
+	TP_fast_assign(
+		__entry->lpid = lpid;
+		__entry->local = local;
+		__entry->rb = rb;
+		__entry->rs = rs;
+		__entry->ric = ric;
+		__entry->prs = prs;
+		__entry->r = r;
+		),
+
+	TP_printk("lpid=%ld, local=%ld, rb=0x%lx, rs=0x%lx, ric=0x%lx, "
+		"prs=0x%lx, r=0x%lx", __entry->lpid, __entry->local, __entry->rb,
+		__entry->rs, __entry->ric, __entry->prs, __entry->r)
+);
+
 #endif /* _TRACE_POWERPC_H */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 99b4e9d..5b032e9 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -15,6 +15,7 @@
 #include <linux/log2.h>
 
 #include <asm/tlbflush.h>
+#include <asm/trace.h>
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_book3s.h>
 #include <asm/book3s/64/mmu-hash.h>
@@ -414,16 +415,22 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
 			cpu_relax();
 		if (need_sync)
 			asm volatile("ptesync" : : : "memory");
-		for (i = 0; i < npages; ++i)
+		for (i = 0; i < npages; ++i) {
 			asm volatile(PPC_TLBIE(%1,%0) : :
 				     "r" (rbvalues[i]), "r" (kvm->arch.lpid));
+			trace_tlbie(kvm->arch.lpid, 0, rbvalues[i],
+				kvm->arch.lpid, 0, 0, 0);
+		}
 		asm volatile("eieio; tlbsync; ptesync" : : : "memory");
 		kvm->arch.tlbie_lock = 0;
 	} else {
 		if (need_sync)
 			asm volatile("ptesync" : : : "memory");
-		for (i = 0; i < npages; ++i)
+		for (i = 0; i < npages; ++i) {
 			asm volatile("tlbiel %0" : : "r" (rbvalues[i]));
+			trace_tlbie(kvm->arch.lpid, 1, rbvalues[i],
+				0, 0, 0, 0);
+		}
 		asm volatile("ptesync" : : : "memory");
 	}
 }
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 9d9b3ef..a31ff2b 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -23,6 +23,7 @@
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
+#include <asm/trace.h>
 #include <asm/tlb.h>
 #include <asm/cputable.h>
 #include <asm/udbg.h>
@@ -98,6 +99,7 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
 			     : "memory");
 		break;
 	}
+	trace_tlbie(0, 0, va, 0, 0, 0, 0);
 }
 
 static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
@@ -147,6 +149,7 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
 			     : "memory");
 		break;
 	}
+	trace_tlbie(0, 1, va, 0, 0, 0, 0);
 
 }
 
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index bda8c43..933b373 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -16,6 +16,7 @@
 
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
+#include <asm/trace.h>
 
 static DEFINE_RAW_SPINLOCK(native_tlbie_lock);
 
@@ -38,6 +39,7 @@ static inline void __tlbiel_pid(unsigned long pid, int set,
 	asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
 	asm volatile("ptesync": : :"memory");
+	trace_tlbie(0, 1, rb, rs, ric, prs, r);
 }
 
 /*
@@ -66,6 +68,7 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
+	trace_tlbie(0, 0, rb, rs, ric, prs, r);
 }
 
 static inline void _tlbiel_va(unsigned long va, unsigned long pid,
@@ -83,6 +86,7 @@ static inline void _tlbiel_va(unsigned long va, unsigned long pid,
 	asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
 	asm volatile("ptesync": : :"memory");
+	trace_tlbie(0, 1, rb, rs, ric, prs, r);
 }
 
 static inline void _tlbie_va(unsigned long va, unsigned long pid,
@@ -100,6 +104,7 @@ static inline void _tlbie_va(unsigned long va, unsigned long pid,
 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
+	trace_tlbie(0, 0, rb, rs, ric, prs, r);
 }
 
 /*
@@ -374,6 +379,7 @@ void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa,
 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
+	trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
 }
 EXPORT_SYMBOL(radix__flush_tlb_lpid_va);
 
@@ -391,6 +397,7 @@ void radix__flush_tlb_lpid(unsigned long lpid)
 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
+	trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
 }
 EXPORT_SYMBOL(radix__flush_tlb_lpid);
 
@@ -417,10 +424,12 @@ void radix__flush_tlb_all(void)
 	 */
 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
 		     : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory");
+	trace_tlbie(0, 0, rb, rs, ric, prs, r);
 	/*
 	 * now flush host entires by passing PRS = 0 and LPID == 0
 	 */
 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory");
 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
+	trace_tlbie(0, 0, rb, 0, ric, prs, r);
 }

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [RFC] [PATCH] Trace TLBIE's
  2016-11-23  8:06 [RFC] [PATCH] Trace TLBIE's Balbir Singh
@ 2016-11-23 10:15 ` Michael Ellerman
  2016-11-23 11:40   ` Balbir Singh
  0 siblings, 1 reply; 6+ messages in thread
From: Michael Ellerman @ 2016-11-23 10:15 UTC (permalink / raw)
  To: Balbir Singh, linuxppc-dev; +Cc: Paul Mackerras, Nicholas Piggin

Balbir Singh <bsingharora@gmail.com> writes:

> Just a quick patch to trace tlbie(l)'s. The idea being that it can be
> enabled when we suspect corruption or when we need to see if we are doing
> the right thing during flush. I think the format can be enhanced to
> make it nicer (expand the RB/RS/IS/L cases in more detail). For now I am
> sharing the idea to get inputs
>
> A typical trace might look like this
>
>
> <...>-5141  [062]  1354.486693: tlbie:                
> 	tlbie with lpid 0, local 0, rb=7b5d0ff874f11f1, rs=0, ric=0 prs=0 r=0
> systemd-udevd-2584  [018]  1354.486772: tlbie:
> 	tlbie with lpid 0, local 0, rb=17be1f421adc10c1, rs=0, ric=0 prs=0 r=0
> ...
>
> qemu-system-ppc-5371  [016]  1412.369519: tlbie:
> 	tlbie with lpid 0, local 1, rb=67bd8900174c11c1, rs=0, ric=0 prs=0 r=0
> qemu-system-ppc-5377  [056]  1421.687262: tlbie:
> 	tlbie with lpid 1, local 0, rb=5f04edffa00c11c1, rs=1, ric=0 prs=0 r=0

My first reaction is "why the hell do we have so many open-coded
tlbies". So step one might be to add a static inline helper, that way we
don't have to add the trace_tlbie() in so many places.

Also in some of them you call trace_tlbie() before the
eieio/tlbsync/ptesync. Which may not be wrong, but looks worrying at
first glance.

But overall I guess it's OK. We'd want to do a quick benchmark to make
sure it's not adding any overhead.

cheers

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC] [PATCH] Trace TLBIE's
  2016-11-23 10:15 ` Michael Ellerman
@ 2016-11-23 11:40   ` Balbir Singh
  2017-04-06  5:32     ` Alistair Popple
  0 siblings, 1 reply; 6+ messages in thread
From: Balbir Singh @ 2016-11-23 11:40 UTC (permalink / raw)
  To: Michael Ellerman, linuxppc-dev; +Cc: Paul Mackerras, Nicholas Piggin



On 23/11/16 21:15, Michael Ellerman wrote:
> Balbir Singh <bsingharora@gmail.com> writes:
> 
>> Just a quick patch to trace tlbie(l)'s. The idea being that it can be
>> enabled when we suspect corruption or when we need to see if we are doing
>> the right thing during flush. I think the format can be enhanced to
>> make it nicer (expand the RB/RS/IS/L cases in more detail). For now I am
>> sharing the idea to get inputs
>>
>> A typical trace might look like this
>>
>>
>> <...>-5141  [062]  1354.486693: tlbie:                
>> 	tlbie with lpid 0, local 0, rb=7b5d0ff874f11f1, rs=0, ric=0 prs=0 r=0
>> systemd-udevd-2584  [018]  1354.486772: tlbie:
>> 	tlbie with lpid 0, local 0, rb=17be1f421adc10c1, rs=0, ric=0 prs=0 r=0
>> ...
>>
>> qemu-system-ppc-5371  [016]  1412.369519: tlbie:
>> 	tlbie with lpid 0, local 1, rb=67bd8900174c11c1, rs=0, ric=0 prs=0 r=0
>> qemu-system-ppc-5377  [056]  1421.687262: tlbie:
>> 	tlbie with lpid 1, local 0, rb=5f04edffa00c11c1, rs=1, ric=0 prs=0 r=0
> 
> My first reaction is "why the hell do we have so many open-coded
> tlbies". So step one might be to add a static inline helper, that way we
> don't have to add the trace_tlbie() in so many places.
> 

The problem is the variants

Hash uses the two operand variant and radix on p9 uses the 5 operand variant.
To make matters even more complex depending on the CPU_FTR_ARCH we also have
operand variants with L and a version where the immediate value was removed.
I've tried to encapsulate the traces with their variants, but that implied
having the traces at the various call sites

We also use tlbie* from assembly (like the one called by kvmppc_hv_entry),
which is not covered by these traces at the moment, but I guess we can
add them as well.

> Also in some of them you call trace_tlbie() before the
> eieio/tlbsync/ptesync. Which may not be wrong, but looks worrying at
> first glance.
> 

I'll try and make it consistent, but there are places like do_tlbies()
where we loop do the tlbie(l)'s and then do sync's. In those case
I've had to put the traces within the loop instead of queuing or looping
twice. We don't access any of the addresses flushed, just print the registers.

> But overall I guess it's OK. We'd want to do a quick benchmark to make
> sure it's not adding any overhead.

OK.. I'll try and find a benchmark and run it with traces disabled.

Thanks for the review
Balbir Singh

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC] [PATCH] Trace TLBIE's
  2016-11-23 11:40   ` Balbir Singh
@ 2017-04-06  5:32     ` Alistair Popple
  2017-04-06  6:03       ` Michael Ellerman
  0 siblings, 1 reply; 6+ messages in thread
From: Alistair Popple @ 2017-04-06  5:32 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Balbir Singh, Michael Ellerman, linuxppc-dev, Paul Mackerras,
	Nicholas Piggin

On Wed, 23 Nov 2016 10:40:36 PM Balbir Singh wrote:
>
> > But overall I guess it's OK. We'd want to do a quick benchmark to make
> > sure it's not adding any overhead.
>
> OK.. I'll try and find a benchmark and run it with traces disabled.

For what's it's worth I didn't notice any slow down running a NAMD
test with tracing disabled and ~400000 tlbie's in 34s. Turning tracing
on could have slowed things down ever so slightly, but I didn't notice
as it may have just been in the noise of the benchmark I was running
(I wasn't specifically looking at timing, hence the vagueness of the
remarks).

Otherwise the patch worked and would be useful - I have already had to
setup tlbie counting/tracing several times in the last 12 months.

Tested-by: Alistair Popple <alistair@popple.id.au>

- Alistair

> Thanks for the review
> Balbir Singh

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC] [PATCH] Trace TLBIE's
  2017-04-06  5:32     ` Alistair Popple
@ 2017-04-06  6:03       ` Michael Ellerman
  2017-04-06  6:23         ` Balbir Singh
  0 siblings, 1 reply; 6+ messages in thread
From: Michael Ellerman @ 2017-04-06  6:03 UTC (permalink / raw)
  To: Alistair Popple, linuxppc-dev
  Cc: Balbir Singh, linuxppc-dev, Paul Mackerras, Nicholas Piggin

Alistair Popple <alistair@popple.id.au> writes:

> On Wed, 23 Nov 2016 10:40:36 PM Balbir Singh wrote:
>>
>> > But overall I guess it's OK. We'd want to do a quick benchmark to make
>> > sure it's not adding any overhead.
>>
>> OK.. I'll try and find a benchmark and run it with traces disabled.
>
> For what's it's worth I didn't notice any slow down running a NAMD
> test with tracing disabled and ~400000 tlbie's in 34s. Turning tracing
> on could have slowed things down ever so slightly, but I didn't notice
> as it may have just been in the noise of the benchmark I was running
> (I wasn't specifically looking at timing, hence the vagueness of the
> remarks).
>
> Otherwise the patch worked and would be useful - I have already had to
> setup tlbie counting/tracing several times in the last 12 months.
>
> Tested-by: Alistair Popple <alistair@popple.id.au>

OK thanks.

To get it merged I'd like:
 - always called after the barriers.
 - no calls in loops, instead add a "count" field to the trace point and
   when we call it multiple times in a loop we pass the count.

So you'd see entries something like:

<...>-5141  [062]  1354.486693: tlbie: lpid=0, local=1, rb=0x7b5d0ff874f11f1, rs=0, ric=0, prs=0, r=0, count=128

cheers

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC] [PATCH] Trace TLBIE's
  2017-04-06  6:03       ` Michael Ellerman
@ 2017-04-06  6:23         ` Balbir Singh
  0 siblings, 0 replies; 6+ messages in thread
From: Balbir Singh @ 2017-04-06  6:23 UTC (permalink / raw)
  To: Michael Ellerman, Alistair Popple, linuxppc-dev
  Cc: linuxppc-dev, Paul Mackerras, Nicholas Piggin

On Thu, 2017-04-06 at 16:03 +1000, Michael Ellerman wrote:
> Alistair Popple <alistair@popple.id.au> writes:
> 
> > On Wed, 23 Nov 2016 10:40:36 PM Balbir Singh wrote:
> > > 
> > > > But overall I guess it's OK. We'd want to do a quick benchmark to make
> > > > sure it's not adding any overhead.
> > > 
> > > OK.. I'll try and find a benchmark and run it with traces disabled.
> > 
> > For what's it's worth I didn't notice any slow down running a NAMD
> > test with tracing disabled and ~400000 tlbie's in 34s. Turning tracing
> > on could have slowed things down ever so slightly, but I didn't notice
> > as it may have just been in the noise of the benchmark I was running
> > (I wasn't specifically looking at timing, hence the vagueness of the
> > remarks).
> > 
> > Otherwise the patch worked and would be useful - I have already had to
> > setup tlbie counting/tracing several times in the last 12 months.
> > 
> > Tested-by: Alistair Popple <alistair@popple.id.au>
> 
> OK thanks.
> 
> To get it merged I'd like:
>  - always called after the barriers.
>  - no calls in loops, instead add a "count" field to the trace point and
>    when we call it multiple times in a loop we pass the count.
> 
> So you'd see entries something like:
> 
> <...>-5141  [062]  1354.486693: tlbie: lpid=0, local=1, rb=0x7b5d0ff874f11f1, rs=0, ric=0, prs=0, r=0, count=128
> 

I'll double check the patches and repost if required.

Balbir Singh.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2017-04-06  6:23 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-11-23  8:06 [RFC] [PATCH] Trace TLBIE's Balbir Singh
2016-11-23 10:15 ` Michael Ellerman
2016-11-23 11:40   ` Balbir Singh
2017-04-06  5:32     ` Alistair Popple
2017-04-06  6:03       ` Michael Ellerman
2017-04-06  6:23         ` Balbir Singh

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).