[bpf-next v2 1/2] bpf: add runtime stats, max cost

BPF List
 help / color / mirror / Atom feed

* [bpf-next v2 1/2] bpf: add runtime stats, max cost
@ 2022-12-15  4:32 xiangxia.m.yue
  2022-12-15  4:32 ` [bpf-next v2 2/2] bpftool: " xiangxia.m.yue
  2022-12-16  5:39 ` [bpf-next v2 1/2] bpf: " Yonghong Song
  0 siblings, 2 replies; 8+ messages in thread
From: xiangxia.m.yue @ 2022-12-15  4:32 UTC (permalink / raw)
  To: bpf
  Cc: Tonghao Zhang, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Martin KaFai Lau, Song Liu, Yonghong Song,
	John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
	Hou Tao

From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

Now user can enable sysctl kernel.bpf_stats_enabled to fetch
run_time_ns and run_cnt. It's easy to calculate the average value.

In some case, the max cost for bpf prog invoked, are more useful:
is there a burst sysload or high cpu usage. This patch introduce
a update stats helper.

$ bpftool --json --pretty p s
   ...
   "run_max_cost_ns": 313367

Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: Song Liu <song@kernel.org>
Cc: Yonghong Song <yhs@fb.com>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: KP Singh <kpsingh@kernel.org>
Cc: Stanislav Fomichev <sdf@google.com>
Cc: Hao Luo <haoluo@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Hou Tao <houtao1@huawei.com>
---
v2: fix build warning
---
 include/linux/filter.h   | 29 ++++++++++++++++++++++-------
 include/uapi/linux/bpf.h |  1 +
 kernel/bpf/syscall.c     | 10 +++++++++-
 kernel/bpf/trampoline.c  | 10 +---------
 4 files changed, 33 insertions(+), 17 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index bf701976056e..886b65fcd4ac 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -556,6 +556,7 @@ struct bpf_prog_stats {
 	u64_stats_t cnt;
 	u64_stats_t nsecs;
 	u64_stats_t misses;
+	u64_stats_t max_cost;
 	struct u64_stats_sync syncp;
 } __aligned(2 * sizeof(u64));
 
@@ -578,6 +579,26 @@ typedef unsigned int (*bpf_dispatcher_fn)(const void *ctx,
 					  unsigned int (*bpf_func)(const void *,
 								   const struct bpf_insn *));
 
+static inline void bpf_prog_update_stats(const struct bpf_prog *prog, u64 start)
+{
+	struct bpf_prog_stats *stats;
+	unsigned long flags;
+	u64 run_time, max_cost;
+
+	stats = this_cpu_ptr(prog->stats);
+	flags = u64_stats_update_begin_irqsave(&stats->syncp);
+
+	run_time =  sched_clock() - start;
+	u64_stats_inc(&stats->cnt);
+	u64_stats_add(&stats->nsecs, run_time);
+
+	max_cost = u64_stats_read(&stats->max_cost);
+	if (max_cost < run_time)
+		u64_stats_set(&stats->max_cost, run_time);
+
+	u64_stats_update_end_irqrestore(&stats->syncp, flags);
+}
+
 static __always_inline u32 __bpf_prog_run(const struct bpf_prog *prog,
 					  const void *ctx,
 					  bpf_dispatcher_fn dfunc)
@@ -586,16 +607,10 @@ static __always_inline u32 __bpf_prog_run(const struct bpf_prog *prog,
 
 	cant_migrate();
 	if (static_branch_unlikely(&bpf_stats_enabled_key)) {
-		struct bpf_prog_stats *stats;
 		u64 start = sched_clock();
-		unsigned long flags;
 
 		ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
-		stats = this_cpu_ptr(prog->stats);
-		flags = u64_stats_update_begin_irqsave(&stats->syncp);
-		u64_stats_inc(&stats->cnt);
-		u64_stats_add(&stats->nsecs, sched_clock() - start);
-		u64_stats_update_end_irqrestore(&stats->syncp, flags);
+		bpf_prog_update_stats(prog, start);
 	} else {
 		ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
 	}
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 464ca3f01fe7..da4d1f2d7bc2 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -6259,6 +6259,7 @@ struct bpf_prog_info {
 	__u32 verified_insns;
 	__u32 attach_btf_obj_id;
 	__u32 attach_btf_id;
+	__u64 run_max_cost_ns;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 64131f88c553..06439b09863d 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2105,6 +2105,7 @@ struct bpf_prog_kstats {
 	u64 nsecs;
 	u64 cnt;
 	u64 misses;
+	u64 max_cost;
 };
 
 void notrace bpf_prog_inc_misses_counter(struct bpf_prog *prog)
@@ -2122,12 +2123,13 @@ static void bpf_prog_get_stats(const struct bpf_prog *prog,
 			       struct bpf_prog_kstats *stats)
 {
 	u64 nsecs = 0, cnt = 0, misses = 0;
+	u64 max_cost = 0;
 	int cpu;
 
 	for_each_possible_cpu(cpu) {
 		const struct bpf_prog_stats *st;
 		unsigned int start;
-		u64 tnsecs, tcnt, tmisses;
+		u64 tnsecs, tcnt, tmisses, tmax_cost;
 
 		st = per_cpu_ptr(prog->stats, cpu);
 		do {
@@ -2135,14 +2137,17 @@ static void bpf_prog_get_stats(const struct bpf_prog *prog,
 			tnsecs = u64_stats_read(&st->nsecs);
 			tcnt = u64_stats_read(&st->cnt);
 			tmisses = u64_stats_read(&st->misses);
+			tmax_cost = u64_stats_read(&st->max_cost);
 		} while (u64_stats_fetch_retry(&st->syncp, start));
 		nsecs += tnsecs;
 		cnt += tcnt;
 		misses += tmisses;
+		max_cost = max(max_cost, tmax_cost);
 	}
 	stats->nsecs = nsecs;
 	stats->cnt = cnt;
 	stats->misses = misses;
+	stats->max_cost = max_cost;
 }
 
 #ifdef CONFIG_PROC_FS
@@ -2162,6 +2167,7 @@ static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
 		   "prog_id:\t%u\n"
 		   "run_time_ns:\t%llu\n"
 		   "run_cnt:\t%llu\n"
+		   "run_max_cost_ns:\t%llu\n"
 		   "recursion_misses:\t%llu\n"
 		   "verified_insns:\t%u\n",
 		   prog->type,
@@ -2171,6 +2177,7 @@ static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
 		   prog->aux->id,
 		   stats.nsecs,
 		   stats.cnt,
+		   stats.max_cost,
 		   stats.misses,
 		   prog->aux->verified_insns);
 }
@@ -3962,6 +3969,7 @@ static int bpf_prog_get_info_by_fd(struct file *file,
 	info.run_time_ns = stats.nsecs;
 	info.run_cnt = stats.cnt;
 	info.recursion_misses = stats.misses;
+	info.run_max_cost_ns = stats.max_cost;
 
 	info.verified_insns = prog->aux->verified_insns;
 
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index d6395215b849..4ddad462562e 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -882,8 +882,6 @@ static u64 notrace __bpf_prog_enter_recur(struct bpf_prog *prog, struct bpf_tram
 static void notrace update_prog_stats(struct bpf_prog *prog,
 				      u64 start)
 {
-	struct bpf_prog_stats *stats;
-
 	if (static_branch_unlikely(&bpf_stats_enabled_key) &&
 	    /* static_key could be enabled in __bpf_prog_enter*
 	     * and disabled in __bpf_prog_exit*.
@@ -891,13 +889,7 @@ static void notrace update_prog_stats(struct bpf_prog *prog,
 	     * Hence check that 'start' is valid.
 	     */
 	    start > NO_START_TIME) {
-		unsigned long flags;
-
-		stats = this_cpu_ptr(prog->stats);
-		flags = u64_stats_update_begin_irqsave(&stats->syncp);
-		u64_stats_inc(&stats->cnt);
-		u64_stats_add(&stats->nsecs, sched_clock() - start);
-		u64_stats_update_end_irqrestore(&stats->syncp, flags);
+		bpf_prog_update_stats(prog, start);
 	}
 }
 
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [bpf-next v2 2/2] bpftool: add runtime stats, max cost
  2022-12-15  4:32 [bpf-next v2 1/2] bpf: add runtime stats, max cost xiangxia.m.yue
@ 2022-12-15  4:32 ` xiangxia.m.yue
  2022-12-16  5:39 ` [bpf-next v2 1/2] bpf: " Yonghong Song
  1 sibling, 0 replies; 8+ messages in thread
From: xiangxia.m.yue @ 2022-12-15  4:32 UTC (permalink / raw)
  To: bpf
  Cc: Tonghao Zhang, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Martin KaFai Lau, Song Liu, Yonghong Song,
	John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
	Hou Tao

From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: Song Liu <song@kernel.org>
Cc: Yonghong Song <yhs@fb.com>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: KP Singh <kpsingh@kernel.org>
Cc: Stanislav Fomichev <sdf@google.com>
Cc: Hao Luo <haoluo@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Hou Tao <houtao1@huawei.com>
---
 tools/bpf/bpftool/prog.c       | 1 +
 tools/include/uapi/linux/bpf.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index cfc9fdc1e863..c7764ff4079c 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -437,6 +437,7 @@ static void print_prog_header_json(struct bpf_prog_info *info, int fd)
 	if (info->run_time_ns) {
 		jsonw_uint_field(json_wtr, "run_time_ns", info->run_time_ns);
 		jsonw_uint_field(json_wtr, "run_cnt", info->run_cnt);
+		jsonw_uint_field(json_wtr, "run_max_cost_ns", info->run_max_cost_ns);
 	}
 	if (info->recursion_misses)
 		jsonw_uint_field(json_wtr, "recursion_misses", info->recursion_misses);
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 464ca3f01fe7..da4d1f2d7bc2 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -6259,6 +6259,7 @@ struct bpf_prog_info {
 	__u32 verified_insns;
 	__u32 attach_btf_obj_id;
 	__u32 attach_btf_id;
+	__u64 run_max_cost_ns;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [bpf-next v2 1/2] bpf: add runtime stats, max cost
  2022-12-15  4:32 [bpf-next v2 1/2] bpf: add runtime stats, max cost xiangxia.m.yue
  2022-12-15  4:32 ` [bpf-next v2 2/2] bpftool: " xiangxia.m.yue
@ 2022-12-16  5:39 ` Yonghong Song
  2022-12-16  9:05   ` Tonghao Zhang
  1 sibling, 1 reply; 8+ messages in thread
From: Yonghong Song @ 2022-12-16  5:39 UTC (permalink / raw)
  To: xiangxia.m.yue, bpf
  Cc: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Martin KaFai Lau, Song Liu, Yonghong Song, John Fastabend,
	KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa, Hou Tao



On 12/14/22 8:32 PM, xiangxia.m.yue@gmail.com wrote:
> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> 
> Now user can enable sysctl kernel.bpf_stats_enabled to fetch
> run_time_ns and run_cnt. It's easy to calculate the average value.
> 
> In some case, the max cost for bpf prog invoked, are more useful:
> is there a burst sysload or high cpu usage. This patch introduce
> a update stats helper.

I am not 100% sure about how this single max value will be useful
in general. A particular max_run_time_ns, if much bigger than average,
could be an outlier due to preemption/softirq etc.
What you really need might be a trend over time of the run_time
to capture the burst. You could do this by taking snapshot of
run_time_ns/run_cnt periodically and plot the trend of average
run_time_ns which might correlate with other system activity.
Maybe I missed some use cases for max_run_time_ns...

> 
> $ bpftool --json --pretty p s
>     ...
>     "run_max_cost_ns": 313367
> 
> Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> Cc: Alexei Starovoitov <ast@kernel.org>
> Cc: Daniel Borkmann <daniel@iogearbox.net>
> Cc: Andrii Nakryiko <andrii@kernel.org>
> Cc: Martin KaFai Lau <martin.lau@linux.dev>
> Cc: Song Liu <song@kernel.org>
> Cc: Yonghong Song <yhs@fb.com>
> Cc: John Fastabend <john.fastabend@gmail.com>
> Cc: KP Singh <kpsingh@kernel.org>
> Cc: Stanislav Fomichev <sdf@google.com>
> Cc: Hao Luo <haoluo@google.com>
> Cc: Jiri Olsa <jolsa@kernel.org>
> Cc: Hou Tao <houtao1@huawei.com>
> ---
> v2: fix build warning
> ---
>   include/linux/filter.h   | 29 ++++++++++++++++++++++-------
>   include/uapi/linux/bpf.h |  1 +
>   kernel/bpf/syscall.c     | 10 +++++++++-
>   kernel/bpf/trampoline.c  | 10 +---------
>   4 files changed, 33 insertions(+), 17 deletions(-)
> 
[...]

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [bpf-next v2 1/2] bpf: add runtime stats, max cost
  2022-12-16  5:39 ` [bpf-next v2 1/2] bpf: " Yonghong Song
@ 2022-12-16  9:05   ` Tonghao Zhang
  2022-12-16 16:06     ` Daniel Borkmann
  0 siblings, 1 reply; 8+ messages in thread
From: Tonghao Zhang @ 2022-12-16  9:05 UTC (permalink / raw)
  To: Yonghong Song
  Cc: bpf, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Martin KaFai Lau, Song Liu, Yonghong Song, John Fastabend,
	KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa, Hou Tao

On Fri, Dec 16, 2022 at 1:40 PM Yonghong Song <yhs@meta.com> wrote:
>
>
>
> On 12/14/22 8:32 PM, xiangxia.m.yue@gmail.com wrote:
> > From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> >
> > Now user can enable sysctl kernel.bpf_stats_enabled to fetch
> > run_time_ns and run_cnt. It's easy to calculate the average value.
> >
> > In some case, the max cost for bpf prog invoked, are more useful:
> > is there a burst sysload or high cpu usage. This patch introduce
> > a update stats helper.
>
> I am not 100% sure about how this single max value will be useful
> in general. A particular max_run_time_ns, if much bigger than average,
> could be an outlier due to preemption/softirq etc.
> What you really need might be a trend over time of the run_time
> to capture the burst. You could do this by taking snapshot of
Hi
If the bpf prog is invoked frequently,  the run_time_ns/run_cnt may
not be increased too much while
there is a maxcost in bpf prog. The max cost value means there is at
least one high cost in bpf prog.
we should take care of the most cost of bpf prog. especially, much
more than run_time_ns/run_cnt.

> run_time_ns/run_cnt periodically and plot the trend of average
> run_time_ns which might correlate with other system activity.
> Maybe I missed some use cases for max_run_time_ns...
>
> >
> > $ bpftool --json --pretty p s
> >     ...
> >     "run_max_cost_ns": 313367
> >
> > Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> > Cc: Alexei Starovoitov <ast@kernel.org>
> > Cc: Daniel Borkmann <daniel@iogearbox.net>
> > Cc: Andrii Nakryiko <andrii@kernel.org>
> > Cc: Martin KaFai Lau <martin.lau@linux.dev>
> > Cc: Song Liu <song@kernel.org>
> > Cc: Yonghong Song <yhs@fb.com>
> > Cc: John Fastabend <john.fastabend@gmail.com>
> > Cc: KP Singh <kpsingh@kernel.org>
> > Cc: Stanislav Fomichev <sdf@google.com>
> > Cc: Hao Luo <haoluo@google.com>
> > Cc: Jiri Olsa <jolsa@kernel.org>
> > Cc: Hou Tao <houtao1@huawei.com>
> > ---
> > v2: fix build warning
> > ---
> >   include/linux/filter.h   | 29 ++++++++++++++++++++++-------
> >   include/uapi/linux/bpf.h |  1 +
> >   kernel/bpf/syscall.c     | 10 +++++++++-
> >   kernel/bpf/trampoline.c  | 10 +---------
> >   4 files changed, 33 insertions(+), 17 deletions(-)
> >
> [...]



-- 
Best regards, Tonghao

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [bpf-next v2 1/2] bpf: add runtime stats, max cost
  2022-12-16  9:05   ` Tonghao Zhang
@ 2022-12-16 16:06     ` Daniel Borkmann
  2022-12-17 15:37       ` Tonghao Zhang
  0 siblings, 1 reply; 8+ messages in thread
From: Daniel Borkmann @ 2022-12-16 16:06 UTC (permalink / raw)
  To: Tonghao Zhang, Yonghong Song
  Cc: bpf, Alexei Starovoitov, Andrii Nakryiko, Martin KaFai Lau,
	Song Liu, Yonghong Song, John Fastabend, KP Singh,
	Stanislav Fomichev, Hao Luo, Jiri Olsa, Hou Tao

On 12/16/22 10:05 AM, Tonghao Zhang wrote:
> On Fri, Dec 16, 2022 at 1:40 PM Yonghong Song <yhs@meta.com> wrote:
>> On 12/14/22 8:32 PM, xiangxia.m.yue@gmail.com wrote:
>>> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
>>>
>>> Now user can enable sysctl kernel.bpf_stats_enabled to fetch
>>> run_time_ns and run_cnt. It's easy to calculate the average value.
>>>
>>> In some case, the max cost for bpf prog invoked, are more useful:
>>> is there a burst sysload or high cpu usage. This patch introduce
>>> a update stats helper.
>>
>> I am not 100% sure about how this single max value will be useful
>> in general. A particular max_run_time_ns, if much bigger than average,
>> could be an outlier due to preemption/softirq etc.
>> What you really need might be a trend over time of the run_time
>> to capture the burst. You could do this by taking snapshot of
> Hi
> If the bpf prog is invoked frequently,  the run_time_ns/run_cnt may
> not be increased too much while
> there is a maxcost in bpf prog. The max cost value means there is at
> least one high cost in bpf prog.
> we should take care of the most cost of bpf prog. especially, much
> more than run_time_ns/run_cnt.

But then again, see Yonghong's comment with regards to outliers. I
think what you're probably rather asking for is something like tracking
p50/p90/p99 run_time_ns numbers over time to get a better picture. Not
sure how single max cost would help, really..

>> run_time_ns/run_cnt periodically and plot the trend of average
>> run_time_ns which might correlate with other system activity.
>> Maybe I missed some use cases for max_run_time_ns...
>>
>>>
>>> $ bpftool --json --pretty p s
>>>      ...
>>>      "run_max_cost_ns": 313367
>>>
>>> Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
>>> Cc: Alexei Starovoitov <ast@kernel.org>
>>> Cc: Daniel Borkmann <daniel@iogearbox.net>
>>> Cc: Andrii Nakryiko <andrii@kernel.org>
>>> Cc: Martin KaFai Lau <martin.lau@linux.dev>
>>> Cc: Song Liu <song@kernel.org>
>>> Cc: Yonghong Song <yhs@fb.com>
>>> Cc: John Fastabend <john.fastabend@gmail.com>
>>> Cc: KP Singh <kpsingh@kernel.org>
>>> Cc: Stanislav Fomichev <sdf@google.com>
>>> Cc: Hao Luo <haoluo@google.com>
>>> Cc: Jiri Olsa <jolsa@kernel.org>
>>> Cc: Hou Tao <houtao1@huawei.com>
>>> ---
>>> v2: fix build warning
>>> ---
>>>    include/linux/filter.h   | 29 ++++++++++++++++++++++-------
>>>    include/uapi/linux/bpf.h |  1 +
>>>    kernel/bpf/syscall.c     | 10 +++++++++-
>>>    kernel/bpf/trampoline.c  | 10 +---------
>>>    4 files changed, 33 insertions(+), 17 deletions(-)
>>>
>> [...]
> 
> 
> 


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [bpf-next v2 1/2] bpf: add runtime stats, max cost
  2022-12-16 16:06     ` Daniel Borkmann
@ 2022-12-17 15:37       ` Tonghao Zhang
  2022-12-19  8:04         ` Song Liu
  0 siblings, 1 reply; 8+ messages in thread
From: Tonghao Zhang @ 2022-12-17 15:37 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: Yonghong Song, bpf, Alexei Starovoitov, Andrii Nakryiko,
	Martin KaFai Lau, Song Liu, Yonghong Song, John Fastabend,
	KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa, Hou Tao

On Sat, Dec 17, 2022 at 12:07 AM Daniel Borkmann <daniel@iogearbox.net> wrote:
>
> On 12/16/22 10:05 AM, Tonghao Zhang wrote:
> > On Fri, Dec 16, 2022 at 1:40 PM Yonghong Song <yhs@meta.com> wrote:
> >> On 12/14/22 8:32 PM, xiangxia.m.yue@gmail.com wrote:
> >>> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> >>>
> >>> Now user can enable sysctl kernel.bpf_stats_enabled to fetch
> >>> run_time_ns and run_cnt. It's easy to calculate the average value.
> >>>
> >>> In some case, the max cost for bpf prog invoked, are more useful:
> >>> is there a burst sysload or high cpu usage. This patch introduce
> >>> a update stats helper.
> >>
> >> I am not 100% sure about how this single max value will be useful
> >> in general. A particular max_run_time_ns, if much bigger than average,
> >> could be an outlier due to preemption/softirq etc.
> >> What you really need might be a trend over time of the run_time
> >> to capture the burst. You could do this by taking snapshot of
> > Hi
> > If the bpf prog is invoked frequently,  the run_time_ns/run_cnt may
> > not be increased too much while
> > there is a maxcost in bpf prog. The max cost value means there is at
> > least one high cost in bpf prog.
> > we should take care of the most cost of bpf prog. especially, much
> > more than run_time_ns/run_cnt.
>
> But then again, see Yonghong's comment with regards to outliers. I
> think what you're probably rather asking for is something like tracking
> p50/p90/p99 run_time_ns numbers over time to get a better picture. Not
> sure how single max cost would help, really..
What I am asking for is that is there a high cpu cost in bpf prog ? If
the bpf prog run frequently,
the run_time_ns/cnt is not what we want. because if we get bpf runtime
stats frequently, there will
be a high syscall cpu load. so we can't use syscall frequently. so why
I need this max cost value, as
yonghong say "if much bigger than average, could be an outlier due to
preemption/softirq etc.". It is right.
but I think there is another reason, the bpf prog may be too bad to
cause the issue or bpf prog invoke a bpf helper which
take a lot cpu. Anyway this can help us debug the bpf prog. and help
us to know what max cost the prog take. If possible
we can update the commit message and send v3.

> >> run_time_ns/run_cnt periodically and plot the trend of average
> >> run_time_ns which might correlate with other system activity.
> >> Maybe I missed some use cases for max_run_time_ns...
> >>
> >>>
> >>> $ bpftool --json --pretty p s
> >>>      ...
> >>>      "run_max_cost_ns": 313367
> >>>
> >>> Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> >>> Cc: Alexei Starovoitov <ast@kernel.org>
> >>> Cc: Daniel Borkmann <daniel@iogearbox.net>
> >>> Cc: Andrii Nakryiko <andrii@kernel.org>
> >>> Cc: Martin KaFai Lau <martin.lau@linux.dev>
> >>> Cc: Song Liu <song@kernel.org>
> >>> Cc: Yonghong Song <yhs@fb.com>
> >>> Cc: John Fastabend <john.fastabend@gmail.com>
> >>> Cc: KP Singh <kpsingh@kernel.org>
> >>> Cc: Stanislav Fomichev <sdf@google.com>
> >>> Cc: Hao Luo <haoluo@google.com>
> >>> Cc: Jiri Olsa <jolsa@kernel.org>
> >>> Cc: Hou Tao <houtao1@huawei.com>
> >>> ---
> >>> v2: fix build warning
> >>> ---
> >>>    include/linux/filter.h   | 29 ++++++++++++++++++++++-------
> >>>    include/uapi/linux/bpf.h |  1 +
> >>>    kernel/bpf/syscall.c     | 10 +++++++++-
> >>>    kernel/bpf/trampoline.c  | 10 +---------
> >>>    4 files changed, 33 insertions(+), 17 deletions(-)
> >>>
> >> [...]
> >
> >
> >
>


-- 
Best regards, Tonghao

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [bpf-next v2 1/2] bpf: add runtime stats, max cost
  2022-12-17 15:37       ` Tonghao Zhang
@ 2022-12-19  8:04         ` Song Liu
  2022-12-19 18:24           ` Martin KaFai Lau
  0 siblings, 1 reply; 8+ messages in thread
From: Song Liu @ 2022-12-19  8:04 UTC (permalink / raw)
  To: Tonghao Zhang
  Cc: Daniel Borkmann, Yonghong Song, bpf, Alexei Starovoitov,
	Andrii Nakryiko, Martin KaFai Lau, Yonghong Song, John Fastabend,
	KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa, Hou Tao

On Sat, Dec 17, 2022 at 7:38 AM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote:
>
> On Sat, Dec 17, 2022 at 12:07 AM Daniel Borkmann <daniel@iogearbox.net> wrote:
> >
> > On 12/16/22 10:05 AM, Tonghao Zhang wrote:
> > > On Fri, Dec 16, 2022 at 1:40 PM Yonghong Song <yhs@meta.com> wrote:
> > >> On 12/14/22 8:32 PM, xiangxia.m.yue@gmail.com wrote:
> > >>> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> > >>>
> > >>> Now user can enable sysctl kernel.bpf_stats_enabled to fetch
> > >>> run_time_ns and run_cnt. It's easy to calculate the average value.
> > >>>
> > >>> In some case, the max cost for bpf prog invoked, are more useful:
> > >>> is there a burst sysload or high cpu usage. This patch introduce
> > >>> a update stats helper.
> > >>
> > >> I am not 100% sure about how this single max value will be useful
> > >> in general. A particular max_run_time_ns, if much bigger than average,
> > >> could be an outlier due to preemption/softirq etc.
> > >> What you really need might be a trend over time of the run_time
> > >> to capture the burst. You could do this by taking snapshot of
> > > Hi
> > > If the bpf prog is invoked frequently,  the run_time_ns/run_cnt may
> > > not be increased too much while
> > > there is a maxcost in bpf prog. The max cost value means there is at
> > > least one high cost in bpf prog.
> > > we should take care of the most cost of bpf prog. especially, much
> > > more than run_time_ns/run_cnt.
> >
> > But then again, see Yonghong's comment with regards to outliers. I
> > think what you're probably rather asking for is something like tracking
> > p50/p90/p99 run_time_ns numbers over time to get a better picture. Not
> > sure how single max cost would help, really..
> What I am asking for is that is there a high cpu cost in bpf prog ? If
> the bpf prog run frequently,
> the run_time_ns/cnt is not what we want. because if we get bpf runtime
> stats frequently, there will
> be a high syscall cpu load. so we can't use syscall frequently. so why
> I need this max cost value, as
> yonghong say "if much bigger than average, could be an outlier due to
> preemption/softirq etc.". It is right.
> but I think there is another reason, the bpf prog may be too bad to
> cause the issue or bpf prog invoke a bpf helper which
> take a lot cpu. Anyway this can help us debug the bpf prog. and help
> us to know what max cost the prog take. If possible
> we can update the commit message and send v3.

kernel.bpf_stats_enabled is a relatively light weight monitoring interface.
One of the use cases is to enable it for a few seconds periodically, so
we can get an overview of all BPF programs in the system.

While max time cost might be useful in some debugging, I don't think
we should add it with kernel.bpf_stats_enabled. Otherwise, we can
argue p50/p90/p99 are also useful in some cases, and some other
metrics are useful in some other cases.  These metrics together will
make kernel.bpf_stats_enabled too expensive for the use case above.

Since the use case is for debugging, have you considered using
some other BPF programs to profile the target BPF program?
Please refer to "bpftool prog profile" or "perf stat -b " for
examples of similar solutions. We may need to revise the following
check in bpf_check_attach_target() to make this work for some
scenarios:

                if (tgt_prog->type == prog->type) {
                        /* Cannot fentry/fexit another fentry/fexit program.
                         * Cannot attach program extension to another extension.
                         * It's ok to attach fentry/fexit to extension program.
                         */
                        bpf_log(log, "Cannot recursively attach\n");
                        return -EINVAL;
                }

Thanks,
Song

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [bpf-next v2 1/2] bpf: add runtime stats, max cost
  2022-12-19  8:04         ` Song Liu
@ 2022-12-19 18:24           ` Martin KaFai Lau
  0 siblings, 0 replies; 8+ messages in thread
From: Martin KaFai Lau @ 2022-12-19 18:24 UTC (permalink / raw)
  To: Tonghao Zhang
  Cc: Daniel Borkmann, Yonghong Song, bpf, Alexei Starovoitov,
	Andrii Nakryiko, Yonghong Song, John Fastabend, KP Singh,
	Stanislav Fomichev, Hao Luo, Jiri Olsa, Hou Tao, Song Liu

On 12/19/22 12:04 AM, Song Liu wrote:
> While max time cost might be useful in some debugging, I don't think
> we should add it with kernel.bpf_stats_enabled. Otherwise, we can
> argue p50/p90/p99 are also useful in some cases, and some other
> metrics are useful in some other cases.  These metrics together will
> make kernel.bpf_stats_enabled too expensive for the use case above.
> 
> Since the use case is for debugging, have you considered using
> some other BPF programs to profile the target BPF program?
> Please refer to "bpftool prog profile" or "perf stat -b " for
> examples of similar solutions.
+1.  Please consider other suggested solutions that the same can be done in a 
more flexible way.


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2022-12-19 18:24 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-12-15  4:32 [bpf-next v2 1/2] bpf: add runtime stats, max cost xiangxia.m.yue
2022-12-15  4:32 ` [bpf-next v2 2/2] bpftool: " xiangxia.m.yue
2022-12-16  5:39 ` [bpf-next v2 1/2] bpf: " Yonghong Song
2022-12-16  9:05   ` Tonghao Zhang
2022-12-16 16:06     ` Daniel Borkmann
2022-12-17 15:37       ` Tonghao Zhang
2022-12-19  8:04         ` Song Liu
2022-12-19 18:24           ` Martin KaFai Lau

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox