linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Daniel Borkmann <daniel@iogearbox.net>
To: Kaixu Xia <xiakaixu@huawei.com>,
	ast@plumgrid.com, davem@davemloft.net, acme@kernel.org,
	mingo@redhat.com, a.p.zijlstra@chello.nl,
	masami.hiramatsu.pt@hitachi.com, jolsa@kernel.org
Cc: wangnan0@huawei.com, linux-kernel@vger.kernel.org,
	pi3orama@163.com, hekuang@huawei.com
Subject: Re: [PATCH v4 3/4] bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU conuter
Date: Thu, 30 Jul 2015 01:51:43 +0200	[thread overview]
Message-ID: <55B9670F.4060908@iogearbox.net> (raw)
In-Reply-To: <1438082255-60683-4-git-send-email-xiakaixu@huawei.com>

On 07/28/2015 01:17 PM, Kaixu Xia wrote:
> According to the perf_event_map_fd and index, the function
> bpf_perf_event_read() can convert the corresponding map
> value to the pointer to struct perf_event and return the
> Hardware PMU counter value.
>
> Signed-off-by: Kaixu Xia <xiakaixu@huawei.com>
> ---
>   include/linux/bpf.h        |  1 +
>   include/linux/perf_event.h |  3 ++-
>   include/uapi/linux/bpf.h   |  1 +
>   kernel/bpf/helpers.c       | 36 ++++++++++++++++++++++++++++++++++++
>   kernel/events/core.c       |  4 ++--
>   kernel/trace/bpf_trace.c   |  2 ++
>   6 files changed, 44 insertions(+), 3 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 3c9c0eb..516992c 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -190,6 +190,7 @@ extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
>   extern const struct bpf_func_proto bpf_map_update_elem_proto;
>   extern const struct bpf_func_proto bpf_map_delete_elem_proto;
>
> +extern const struct bpf_func_proto bpf_perf_event_read_proto;
>   extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
>   extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
>   extern const struct bpf_func_proto bpf_tail_call_proto;
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index 2ea4067..899abcb 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -662,7 +662,8 @@ extern void perf_pmu_migrate_context(struct pmu *pmu,
>   				int src_cpu, int dst_cpu);
>   extern u64 perf_event_read_value(struct perf_event *event,
>   				 u64 *enabled, u64 *running);
> -
> +extern void __perf_event_read(void *info);
> +extern u64 perf_event_count(struct perf_event *event);
>
>   struct perf_sample_data {
>   	/*
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 69a1f6b..b9b13ce 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -250,6 +250,7 @@ enum bpf_func_id {
>   	 * Return: 0 on success
>   	 */
>   	BPF_FUNC_get_current_comm,
> +	BPF_FUNC_perf_event_read,	/* u64 bpf_perf_event_read(&map, index) */
>   	__BPF_FUNC_MAX_ID,
>   };
>
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index 1447ec0..c40c5ea 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -182,3 +182,39 @@ const struct bpf_func_proto bpf_get_current_comm_proto = {
>   	.arg1_type	= ARG_PTR_TO_STACK,
>   	.arg2_type	= ARG_CONST_STACK_SIZE,
>   };
> +
> +static u64 bpf_perf_event_read(u64 r1, u64 index, u64 r3, u64 r4, u64 r5)
> +{
> +	struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
> +	struct bpf_array *array = container_of(map, struct bpf_array, map);
> +	struct perf_event *event;
> +
> +	if (index >= array->map.max_entries)
> +		return -E2BIG;

Maybe unlikely(...), likewise below.

> +	event = (struct perf_event *)array->ptrs[index];
> +	if (!event)
> +		return -ENOENT;
> +
> +	if (event->state != PERF_EVENT_STATE_ACTIVE)
> +		return -EINVAL;
> +
> +	if (event->oncpu != raw_smp_processor_id() &&
> +	    event->ctx->task != current)
> +		return -EINVAL;
> +
> +	if (event->attr.inherit)
> +		return -EINVAL;
> +
> +	__perf_event_read(event);
> +
> +	return perf_event_count(event);

I believe this helper should rather go somewhere such as bpf_trace.c
(or under kernel/events/ ?), wouldn't we otherwise get a build error
when perf events are compiled out?

Anyway, I let perf folks comment on that (and the helper in general).

> +}
> +
> +const struct bpf_func_proto bpf_perf_event_read_proto = {
> +	.func		= bpf_perf_event_read,
> +	.gpl_only	= false,
> +	.ret_type	= RET_INTEGER,
> +	.arg1_type	= ARG_CONST_MAP_PTR,
> +	.arg2_type	= ARG_ANYTHING,
> +};
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 58f0d47..c926c6d 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -3177,7 +3177,7 @@ void perf_event_exec(void)
>   /*
>    * Cross CPU call to read the hardware event
>    */
> -static void __perf_event_read(void *info)
> +void __perf_event_read(void *info)

Does this need to be declared in a header file, no?

>   {
>   	struct perf_event *event = info;
>   	struct perf_event_context *ctx = event->ctx;
> @@ -3204,7 +3204,7 @@ static void __perf_event_read(void *info)
>   	raw_spin_unlock(&ctx->lock);
>   }
>
> -static inline u64 perf_event_count(struct perf_event *event)
> +u64 perf_event_count(struct perf_event *event)

Likewise? Should the inlining be preserved?

>   {
>   	if (event->pmu->count)
>   		return event->pmu->count(event);
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index 88a041a..9cf094f 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -183,6 +183,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func

Btw, I'm wondering if the perf event map portions should actually better go
here into bpf_trace.c ...

>   		return bpf_get_trace_printk_proto();
>   	case BPF_FUNC_get_smp_processor_id:
>   		return &bpf_get_smp_processor_id_proto;
> +	case BPF_FUNC_perf_event_read:
> +		return &bpf_perf_event_read_proto;
>   	default:
>   		return NULL;
>   	}
>


  reply	other threads:[~2015-07-29 23:51 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-07-28 11:17 [PATCH v4 0/4] bpf: Introduce the new ability of eBPF programs to access hardware PMU counter Kaixu Xia
2015-07-28 11:17 ` [PATCH v4 1/4] bpf: Make the bpf_prog_array_map more generic Kaixu Xia
2015-07-29 23:17   ` Daniel Borkmann
2015-07-30  1:44     ` Alexei Starovoitov
2015-07-31  8:50       ` xiakaixu
2015-07-31 15:46         ` Alexei Starovoitov
2015-07-28 11:17 ` [PATCH v4 2/4] bpf: Add new bpf map type to store the pointer to struct perf_event Kaixu Xia
2015-07-29 23:30   ` Daniel Borkmann
2015-07-30  1:45     ` Alexei Starovoitov
2015-07-28 11:17 ` [PATCH v4 3/4] bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU conuter Kaixu Xia
2015-07-29 23:51   ` Daniel Borkmann [this message]
2015-07-28 11:17 ` [PATCH v4 4/4] samples/bpf: example of get selected PMU counter value Kaixu Xia
2015-07-29 23:56   ` Daniel Borkmann
2015-07-30  0:08 ` [PATCH v4 0/4] bpf: Introduce the new ability of eBPF programs to access hardware PMU counter Daniel Borkmann
2015-07-30  1:50   ` Alexei Starovoitov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=55B9670F.4060908@iogearbox.net \
    --to=daniel@iogearbox.net \
    --cc=a.p.zijlstra@chello.nl \
    --cc=acme@kernel.org \
    --cc=ast@plumgrid.com \
    --cc=davem@davemloft.net \
    --cc=hekuang@huawei.com \
    --cc=jolsa@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=masami.hiramatsu.pt@hitachi.com \
    --cc=mingo@redhat.com \
    --cc=pi3orama@163.com \
    --cc=wangnan0@huawei.com \
    --cc=xiakaixu@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).