public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH RESEND] kprobe: Use percpu counter to collect nhit statistics
@ 2016-02-03 20:28 Martin KaFai Lau
  2016-02-04 10:24 ` 平松雅巳 / HIRAMATU,MASAMI
  2016-02-09 12:20 ` [tip:perf/core] kprobes: Optimize hot path by using percpu counter to collect 'nhit' statistics tip-bot for Martin KaFai Lau
  0 siblings, 2 replies; 5+ messages in thread
From: Martin KaFai Lau @ 2016-02-03 20:28 UTC (permalink / raw)
  To: linux-kernel
  Cc: Ingo Molnar, Masami Hiramatsu, Steven Rostedt, Alexei Starovoitov,
	Josef Bacik, Kernel Team

When doing ebpf+kprobe on some hot TCP functions (e.g.
tcp_rcv_established), the kprobe_dispatcher shows up in 'perf report'.

In kprobe_dispatcher(), there is a lot of cache bouncing
in 'tk->nhit++'.  'tk->nhit' and 'tk->tp.flags' also share
the same cacheline.

perf report (cycles:pp):
8.30%  ipv4_dst_check
4.74%  copy_user_enhanced_fast_string
3.93%  dst_release
2.80%  tcp_v4_rcv
2.31%  queued_spin_lock_slowpath
2.30%  _raw_spin_lock
1.88%  mlx4_en_process_rx_cq
1.84%  eth_get_headlen
1.81%  ip_rcv_finish
~~~~
1.71%  kprobe_dispatcher
~~~~
1.55%  mlx4_en_xmit
1.09%  __probe_kernel_read

perf report after patch:
9.15%  ipv4_dst_check
5.00%  copy_user_enhanced_fast_string
4.12%  dst_release
2.96%  tcp_v4_rcv
2.50%  _raw_spin_lock
2.39%  queued_spin_lock_slowpath
2.11%  eth_get_headlen
2.03%  mlx4_en_process_rx_cq
1.69%  mlx4_en_xmit
1.19%  ip_rcv_finish
1.12%  __probe_kernel_read
1.02%  ehci_hcd_cleanup

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
---
 kernel/trace/trace_kprobe.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index c995644..21b81a4 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -30,7 +30,7 @@
 struct trace_kprobe {
 	struct list_head	list;
 	struct kretprobe	rp;	/* Use rp.kp for kprobe use */
-	unsigned long 		nhit;
+	unsigned long __percpu *nhit;
 	const char		*symbol;	/* symbol name */
 	struct trace_probe	tp;
 };
@@ -274,6 +274,10 @@ static struct trace_kprobe *alloc_trace_kprobe(const char *group,
 	if (!tk)
 		return ERR_PTR(ret);
 
+	tk->nhit = alloc_percpu(unsigned long);
+	if (!tk->nhit)
+		goto error;
+
 	if (symbol) {
 		tk->symbol = kstrdup(symbol, GFP_KERNEL);
 		if (!tk->symbol)
@@ -313,6 +317,7 @@ static struct trace_kprobe *alloc_trace_kprobe(const char *group,
 error:
 	kfree(tk->tp.call.name);
 	kfree(tk->symbol);
+	free_percpu(tk->nhit);
 	kfree(tk);
 	return ERR_PTR(ret);
 }
@@ -327,6 +332,7 @@ static void free_trace_kprobe(struct trace_kprobe *tk)
 	kfree(tk->tp.call.class->system);
 	kfree(tk->tp.call.name);
 	kfree(tk->symbol);
+	free_percpu(tk->nhit);
 	kfree(tk);
 }
 
@@ -874,9 +880,14 @@ static const struct file_operations kprobe_events_ops = {
 static int probes_profile_seq_show(struct seq_file *m, void *v)
 {
 	struct trace_kprobe *tk = v;
+	unsigned long nhit = 0;
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		nhit += *per_cpu_ptr(tk->nhit, cpu);
 
 	seq_printf(m, "  %-44s %15lu %15lu\n",
-		   trace_event_name(&tk->tp.call), tk->nhit,
+		   trace_event_name(&tk->tp.call), nhit,
 		   tk->rp.kp.nmissed);
 
 	return 0;
@@ -1225,7 +1236,7 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
 {
 	struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp);
 
-	tk->nhit++;
+	raw_cpu_inc(*tk->nhit);
 
 	if (tk->tp.flags & TP_FLAG_TRACE)
 		kprobe_trace_func(tk, regs);
@@ -1242,7 +1253,7 @@ kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
 {
 	struct trace_kprobe *tk = container_of(ri->rp, struct trace_kprobe, rp);
 
-	tk->nhit++;
+	raw_cpu_inc(*tk->nhit);
 
 	if (tk->tp.flags & TP_FLAG_TRACE)
 		kretprobe_trace_func(tk, ri, regs);
-- 
2.5.1

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* RE: [PATCH RESEND] kprobe: Use percpu counter to collect nhit statistics
  2016-02-03 20:28 [PATCH RESEND] kprobe: Use percpu counter to collect nhit statistics Martin KaFai Lau
@ 2016-02-04 10:24 ` 平松雅巳 / HIRAMATU,MASAMI
  2016-02-04 13:19   ` Steven Rostedt
  2016-02-09 12:20 ` [tip:perf/core] kprobes: Optimize hot path by using percpu counter to collect 'nhit' statistics tip-bot for Martin KaFai Lau
  1 sibling, 1 reply; 5+ messages in thread
From: 平松雅巳 / HIRAMATU,MASAMI @ 2016-02-04 10:24 UTC (permalink / raw)
  To: 'Martin KaFai Lau', linux-kernel@vger.kernel.org,
	Ingo Molnar
  Cc: Steven Rostedt, Alexei Starovoitov, Josef Bacik, Kernel Team

From: Martin KaFai Lau [mailto:kafai@fb.com]
>
>When doing ebpf+kprobe on some hot TCP functions (e.g.
>tcp_rcv_established), the kprobe_dispatcher shows up in 'perf report'.
>
>In kprobe_dispatcher(), there is a lot of cache bouncing
>in 'tk->nhit++'.  'tk->nhit' and 'tk->tp.flags' also share
>the same cacheline.
>

Ingo, could you consider to merge this?

Thanks,


>perf report (cycles:pp):
>8.30%  ipv4_dst_check
>4.74%  copy_user_enhanced_fast_string
>3.93%  dst_release
>2.80%  tcp_v4_rcv
>2.31%  queued_spin_lock_slowpath
>2.30%  _raw_spin_lock
>1.88%  mlx4_en_process_rx_cq
>1.84%  eth_get_headlen
>1.81%  ip_rcv_finish
>~~~~
>1.71%  kprobe_dispatcher
>~~~~
>1.55%  mlx4_en_xmit
>1.09%  __probe_kernel_read
>
>perf report after patch:
>9.15%  ipv4_dst_check
>5.00%  copy_user_enhanced_fast_string
>4.12%  dst_release
>2.96%  tcp_v4_rcv
>2.50%  _raw_spin_lock
>2.39%  queued_spin_lock_slowpath
>2.11%  eth_get_headlen
>2.03%  mlx4_en_process_rx_cq
>1.69%  mlx4_en_xmit
>1.19%  ip_rcv_finish
>1.12%  __probe_kernel_read
>1.02%  ehci_hcd_cleanup
>
>Signed-off-by: Martin KaFai Lau <kafai@fb.com>
>Acked-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
>---
> kernel/trace/trace_kprobe.c | 19 +++++++++++++++----
> 1 file changed, 15 insertions(+), 4 deletions(-)
>
>diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
>index c995644..21b81a4 100644
>--- a/kernel/trace/trace_kprobe.c
>+++ b/kernel/trace/trace_kprobe.c
>@@ -30,7 +30,7 @@
> struct trace_kprobe {
> 	struct list_head	list;
> 	struct kretprobe	rp;	/* Use rp.kp for kprobe use */
>-	unsigned long 		nhit;
>+	unsigned long __percpu *nhit;
> 	const char		*symbol;	/* symbol name */
> 	struct trace_probe	tp;
> };
>@@ -274,6 +274,10 @@ static struct trace_kprobe *alloc_trace_kprobe(const char *group,
> 	if (!tk)
> 		return ERR_PTR(ret);
>
>+	tk->nhit = alloc_percpu(unsigned long);
>+	if (!tk->nhit)
>+		goto error;
>+
> 	if (symbol) {
> 		tk->symbol = kstrdup(symbol, GFP_KERNEL);
> 		if (!tk->symbol)
>@@ -313,6 +317,7 @@ static struct trace_kprobe *alloc_trace_kprobe(const char *group,
> error:
> 	kfree(tk->tp.call.name);
> 	kfree(tk->symbol);
>+	free_percpu(tk->nhit);
> 	kfree(tk);
> 	return ERR_PTR(ret);
> }
>@@ -327,6 +332,7 @@ static void free_trace_kprobe(struct trace_kprobe *tk)
> 	kfree(tk->tp.call.class->system);
> 	kfree(tk->tp.call.name);
> 	kfree(tk->symbol);
>+	free_percpu(tk->nhit);
> 	kfree(tk);
> }
>
>@@ -874,9 +880,14 @@ static const struct file_operations kprobe_events_ops = {
> static int probes_profile_seq_show(struct seq_file *m, void *v)
> {
> 	struct trace_kprobe *tk = v;
>+	unsigned long nhit = 0;
>+	int cpu;
>+
>+	for_each_possible_cpu(cpu)
>+		nhit += *per_cpu_ptr(tk->nhit, cpu);
>
> 	seq_printf(m, "  %-44s %15lu %15lu\n",
>-		   trace_event_name(&tk->tp.call), tk->nhit,
>+		   trace_event_name(&tk->tp.call), nhit,
> 		   tk->rp.kp.nmissed);
>
> 	return 0;
>@@ -1225,7 +1236,7 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
> {
> 	struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp);
>
>-	tk->nhit++;
>+	raw_cpu_inc(*tk->nhit);
>
> 	if (tk->tp.flags & TP_FLAG_TRACE)
> 		kprobe_trace_func(tk, regs);
>@@ -1242,7 +1253,7 @@ kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
> {
> 	struct trace_kprobe *tk = container_of(ri->rp, struct trace_kprobe, rp);
>
>-	tk->nhit++;
>+	raw_cpu_inc(*tk->nhit);
>
> 	if (tk->tp.flags & TP_FLAG_TRACE)
> 		kretprobe_trace_func(tk, ri, regs);
>--
>2.5.1

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH RESEND] kprobe: Use percpu counter to collect nhit statistics
  2016-02-04 10:24 ` 平松雅巳 / HIRAMATU,MASAMI
@ 2016-02-04 13:19   ` Steven Rostedt
  2016-02-09 10:09     ` Ingo Molnar
  0 siblings, 1 reply; 5+ messages in thread
From: Steven Rostedt @ 2016-02-04 13:19 UTC (permalink / raw)
  To: 平松雅巳 / HIRAMATU,MASAMI
  Cc: 'Martin KaFai Lau', linux-kernel@vger.kernel.org,
	Ingo Molnar, Alexei Starovoitov, Josef Bacik, Kernel Team

On Thu, 4 Feb 2016 10:24:12 +0000
平松雅巳 / HIRAMATU,MASAMI <masami.hiramatsu.pt@hitachi.com> wrote:

> From: Martin KaFai Lau [mailto:kafai@fb.com]
> >
> >When doing ebpf+kprobe on some hot TCP functions (e.g.
> >tcp_rcv_established), the kprobe_dispatcher shows up in 'perf report'.
> >
> >In kprobe_dispatcher(), there is a lot of cache bouncing
> >in 'tk->nhit++'.  'tk->nhit' and 'tk->tp.flags' also share
> >the same cacheline.
> >  
> 
> Ingo, could you consider to merge this?
> 

Ingo, you can also add my

 Acked-by: Steven Rostedt <rostedt@goodmis.org>

-- Steve

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH RESEND] kprobe: Use percpu counter to collect nhit statistics
  2016-02-04 13:19   ` Steven Rostedt
@ 2016-02-09 10:09     ` Ingo Molnar
  0 siblings, 0 replies; 5+ messages in thread
From: Ingo Molnar @ 2016-02-09 10:09 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: 平松雅巳 / HIRAMATU,MASAMI,
	'Martin KaFai Lau', linux-kernel@vger.kernel.org,
	Ingo Molnar, Alexei Starovoitov, Josef Bacik, Kernel Team


* Steven Rostedt <rostedt@goodmis.org> wrote:

> On Thu, 4 Feb 2016 10:24:12 +0000
> 平松雅巳 / HIRAMATU,MASAMI <masami.hiramatsu.pt@hitachi.com> wrote:
> 
> > From: Martin KaFai Lau [mailto:kafai@fb.com]
> > >
> > >When doing ebpf+kprobe on some hot TCP functions (e.g.
> > >tcp_rcv_established), the kprobe_dispatcher shows up in 'perf report'.
> > >
> > >In kprobe_dispatcher(), there is a lot of cache bouncing
> > >in 'tk->nhit++'.  'tk->nhit' and 'tk->tp.flags' also share
> > >the same cacheline.
> > >  
> > 
> > Ingo, could you consider to merge this?
> > 
> 
> Ingo, you can also add my
> 
>  Acked-by: Steven Rostedt <rostedt@goodmis.org>

Yeah, nice optimization, picked it up - thanks guys!

	Ingo

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [tip:perf/core] kprobes: Optimize hot path by using percpu counter to collect 'nhit' statistics
  2016-02-03 20:28 [PATCH RESEND] kprobe: Use percpu counter to collect nhit statistics Martin KaFai Lau
  2016-02-04 10:24 ` 平松雅巳 / HIRAMATU,MASAMI
@ 2016-02-09 12:20 ` tip-bot for Martin KaFai Lau
  1 sibling, 0 replies; 5+ messages in thread
From: tip-bot for Martin KaFai Lau @ 2016-02-09 12:20 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: peterz, tglx, jbacik, hpa, kafai, kernel-team,
	masami.hiramatsu.pt, torvalds, rostedt, mingo, linux-kernel,
	alexei.starovoitov

Commit-ID:  a7636d9ecfa3ab7800a7c04c1f89378229eff609
Gitweb:     http://git.kernel.org/tip/a7636d9ecfa3ab7800a7c04c1f89378229eff609
Author:     Martin KaFai Lau <kafai@fb.com>
AuthorDate: Wed, 3 Feb 2016 12:28:28 -0800
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Tue, 9 Feb 2016 11:08:58 +0100

kprobes: Optimize hot path by using percpu counter to collect 'nhit' statistics

When doing ebpf+kprobe on some hot TCP functions (e.g.
tcp_rcv_established), the kprobe_dispatcher() function
shows up in 'perf report'.

In kprobe_dispatcher(), there is a lot of cache bouncing
on 'tk->nhit++'.  'tk->nhit' and 'tk->tp.flags' also share
the same cacheline.

perf report (cycles:pp):

	8.30%  ipv4_dst_check
	4.74%  copy_user_enhanced_fast_string
	3.93%  dst_release
	2.80%  tcp_v4_rcv
	2.31%  queued_spin_lock_slowpath
	2.30%  _raw_spin_lock
	1.88%  mlx4_en_process_rx_cq
	1.84%  eth_get_headlen
	1.81%  ip_rcv_finish
	~~~~
	1.71%  kprobe_dispatcher
	~~~~
	1.55%  mlx4_en_xmit
	1.09%  __probe_kernel_read

perf report after patch:

	9.15%  ipv4_dst_check
	5.00%  copy_user_enhanced_fast_string
	4.12%  dst_release
	2.96%  tcp_v4_rcv
	2.50%  _raw_spin_lock
	2.39%  queued_spin_lock_slowpath
	2.11%  eth_get_headlen
	2.03%  mlx4_en_process_rx_cq
	1.69%  mlx4_en_xmit
	1.19%  ip_rcv_finish
	1.12%  __probe_kernel_read
	1.02%  ehci_hcd_cleanup

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: Josef Bacik <jbacik@fb.com>
Cc: Kernel Team <kernel-team@fb.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1454531308-2441898-1-git-send-email-kafai@fb.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/trace/trace_kprobe.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index c995644..21b81a4 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -30,7 +30,7 @@
 struct trace_kprobe {
 	struct list_head	list;
 	struct kretprobe	rp;	/* Use rp.kp for kprobe use */
-	unsigned long 		nhit;
+	unsigned long __percpu *nhit;
 	const char		*symbol;	/* symbol name */
 	struct trace_probe	tp;
 };
@@ -274,6 +274,10 @@ static struct trace_kprobe *alloc_trace_kprobe(const char *group,
 	if (!tk)
 		return ERR_PTR(ret);
 
+	tk->nhit = alloc_percpu(unsigned long);
+	if (!tk->nhit)
+		goto error;
+
 	if (symbol) {
 		tk->symbol = kstrdup(symbol, GFP_KERNEL);
 		if (!tk->symbol)
@@ -313,6 +317,7 @@ static struct trace_kprobe *alloc_trace_kprobe(const char *group,
 error:
 	kfree(tk->tp.call.name);
 	kfree(tk->symbol);
+	free_percpu(tk->nhit);
 	kfree(tk);
 	return ERR_PTR(ret);
 }
@@ -327,6 +332,7 @@ static void free_trace_kprobe(struct trace_kprobe *tk)
 	kfree(tk->tp.call.class->system);
 	kfree(tk->tp.call.name);
 	kfree(tk->symbol);
+	free_percpu(tk->nhit);
 	kfree(tk);
 }
 
@@ -874,9 +880,14 @@ static const struct file_operations kprobe_events_ops = {
 static int probes_profile_seq_show(struct seq_file *m, void *v)
 {
 	struct trace_kprobe *tk = v;
+	unsigned long nhit = 0;
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		nhit += *per_cpu_ptr(tk->nhit, cpu);
 
 	seq_printf(m, "  %-44s %15lu %15lu\n",
-		   trace_event_name(&tk->tp.call), tk->nhit,
+		   trace_event_name(&tk->tp.call), nhit,
 		   tk->rp.kp.nmissed);
 
 	return 0;
@@ -1225,7 +1236,7 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
 {
 	struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp);
 
-	tk->nhit++;
+	raw_cpu_inc(*tk->nhit);
 
 	if (tk->tp.flags & TP_FLAG_TRACE)
 		kprobe_trace_func(tk, regs);
@@ -1242,7 +1253,7 @@ kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
 {
 	struct trace_kprobe *tk = container_of(ri->rp, struct trace_kprobe, rp);
 
-	tk->nhit++;
+	raw_cpu_inc(*tk->nhit);
 
 	if (tk->tp.flags & TP_FLAG_TRACE)
 		kretprobe_trace_func(tk, ri, regs);

^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2016-02-09 12:20 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-02-03 20:28 [PATCH RESEND] kprobe: Use percpu counter to collect nhit statistics Martin KaFai Lau
2016-02-04 10:24 ` 平松雅巳 / HIRAMATU,MASAMI
2016-02-04 13:19   ` Steven Rostedt
2016-02-09 10:09     ` Ingo Molnar
2016-02-09 12:20 ` [tip:perf/core] kprobes: Optimize hot path by using percpu counter to collect 'nhit' statistics tip-bot for Martin KaFai Lau

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox