All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Tobin C. Harding" <tobin@apporbit.com>
To: Song Liu <songliubraving@fb.com>
Cc: netdev@vger.kernel.org, kernel-team@fb.com, qinteng@fb.com,
	Alexei Starovoitov <ast@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Peter Zijlstra <peterz@infradead.org>
Subject: Re: [PATCH v2 bpf-next 1/2] bpf: enable stackmap with build_id in nmi context
Date: Thu, 3 May 2018 17:03:57 +1000	[thread overview]
Message-ID: <20180503070357.GO3791@eros> (raw)
In-Reply-To: <20180502232030.3788284-2-songliubraving@fb.com>

On Wed, May 02, 2018 at 04:20:29PM -0700, Song Liu wrote:
> Currently, we cannot parse build_id in nmi context because of
> up_read(&current->mm->mmap_sem), this makes stackmap with build_id
> less useful. This patch enables parsing build_id in nmi by putting
> the up_read() call in irq_work. To avoid memory allocation in nmi
> context, we use per cpu variable for the irq_work. As a result, only
> one irq_work per cpu is allowed. If the irq_work is in-use, we
> fallback to only report ips.
> 
> Cc: Alexei Starovoitov <ast@kernel.org>
> Cc: Daniel Borkmann <daniel@iogearbox.net>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Signed-off-by: Song Liu <songliubraving@fb.com>
> ---
>  init/Kconfig          |  1 +
>  kernel/bpf/stackmap.c | 59 +++++++++++++++++++++++++++++++++++++++++++++------
>  2 files changed, 54 insertions(+), 6 deletions(-)
> 
> diff --git a/init/Kconfig b/init/Kconfig
> index f013afc..480a4f2 100644
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -1391,6 +1391,7 @@ config BPF_SYSCALL
>  	bool "Enable bpf() system call"
>  	select ANON_INODES
>  	select BPF
> +	select IRQ_WORK
>  	default n
>  	help
>  	  Enable the bpf() system call that allows to manipulate eBPF
> diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
> index 3ba102b..51d4aea 100644
> --- a/kernel/bpf/stackmap.c
> +++ b/kernel/bpf/stackmap.c
> @@ -11,6 +11,7 @@
>  #include <linux/perf_event.h>
>  #include <linux/elf.h>
>  #include <linux/pagemap.h>
> +#include <linux/irq_work.h>
>  #include "percpu_freelist.h"
>  
>  #define STACK_CREATE_FLAG_MASK					\
> @@ -32,6 +33,23 @@ struct bpf_stack_map {
>  	struct stack_map_bucket *buckets[];
>  };
>  
> +/* irq_work to run up_read() for build_id lookup in nmi context */
> +struct stack_map_irq_work {
> +	struct irq_work irq_work;
> +	struct rw_semaphore *sem;
> +};
> +
> +static void do_up_read(struct irq_work *entry)
> +{
> +	struct stack_map_irq_work *work = container_of(entry,
> +			struct stack_map_irq_work, irq_work);

perhaps:
	struct stack_map_irq_work *work;

	work = container_of(entry, struct stack_map_irq_work, irq_work);
> +	up_read(work->sem);
> +	work->sem = NULL;
> +}
> +
> +static DEFINE_PER_CPU(struct stack_map_irq_work, up_read_work);
> +
>  static inline bool stack_map_use_build_id(struct bpf_map *map)
>  {
>  	return (map->map_flags & BPF_F_STACK_BUILD_ID);
> @@ -267,17 +285,27 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
>  {
>  	int i;
>  	struct vm_area_struct *vma;
> +	bool in_nmi_ctx = in_nmi();
> +	bool irq_work_busy = false;
> +	struct stack_map_irq_work *work;
> +
> +	if (in_nmi_ctx) {
> +		work = this_cpu_ptr(&up_read_work);
> +		if (work->irq_work.flags & IRQ_WORK_BUSY)
> +			/* cannot queue more up_read, fallback */
> +			irq_work_busy = true;
> +	}
>  
>  	/*
> -	 * We cannot do up_read() in nmi context, so build_id lookup is
> -	 * only supported for non-nmi events. If at some point, it is
> -	 * possible to run find_vma() without taking the semaphore, we
> -	 * would like to allow build_id lookup in nmi context.
> +	 * We cannot do up_read() in nmi context. To do build_id lookup
> +	 * in nmi context, we need to run up_read() in irq_work. We use
> +	 * a percpu variable to do the irq_work. If the irq_work is
> +	 * already used by another lookup, we fall back to report ips.
>  	 *
>  	 * Same fallback is used for kernel stack (!user) on a stackmap
>  	 * with build_id.
>  	 */
> -	if (!user || !current || !current->mm || in_nmi() ||
> +	if (!user || !current || !current->mm || irq_work_busy ||
>  	    down_read_trylock(&current->mm->mmap_sem) == 0) {
>  		/* cannot access current->mm, fall back to ips */
>  		for (i = 0; i < trace_nr; i++) {
> @@ -299,7 +327,13 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
>  			- vma->vm_start;
>  		id_offs[i].status = BPF_STACK_BUILD_ID_VALID;
>  	}
> -	up_read(&current->mm->mmap_sem);
> +
> +	if (!in_nmi_ctx)
> +		up_read(&current->mm->mmap_sem);
> +	else {

perhaps:
	if (!in_nmi_ctx) {
		up_read(&current->mm->mmap_sem);
	} else {


Hope this helps,
Tobin.

  reply	other threads:[~2018-05-03  7:04 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-05-02 23:20 [PATCH v2 bpf-next 0/2] bpf: enable stackmap with build_id in nmi Song Liu
2018-05-02 23:20 ` [PATCH v2 bpf-next 1/2] bpf: enable stackmap with build_id in nmi context Song Liu
2018-05-03  7:03   ` Tobin C. Harding [this message]
2018-05-02 23:20 ` [PATCH v2 bpf-next 2/2] bpf: add selftest for stackmap with build_id in NMI context Song Liu
2018-05-03  7:19   ` Tobin C. Harding
2018-05-04  6:41     ` Song Liu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180503070357.GO3791@eros \
    --to=tobin@apporbit.com \
    --cc=ast@kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=kernel-team@fb.com \
    --cc=netdev@vger.kernel.org \
    --cc=peterz@infradead.org \
    --cc=qinteng@fb.com \
    --cc=songliubraving@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.