bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: bot+bpf-ci@kernel.org
To: puranjay@kernel.org,bpf@vger.kernel.org
Cc: puranjay@kernel.org,puranjay12@gmail.com,ast@kernel.org,andrii@kernel.org,daniel@iogearbox.net,martin.lau@kernel.org,eddyz87@gmail.com,memxor@gmail.com,kernel-team@meta.com,ast@kernel.org,andrii@kernel.org,daniel@iogearbox.net,martin.lau@kernel.org,eddyz87@gmail.com,yonghong.song@linux.dev,clm@meta.com,ihor.solodrai@linux.dev
Subject: Re: [PATCH bpf-next v4 3/4] bpf: arena: make arena kfuncs any context safe
Date: Fri, 12 Dec 2025 01:05:08 +0000 (UTC)	[thread overview]
Message-ID: <c5daf712b2a51bf284ff8b01a0a0cf3a74b9bf30873dcdf83a8b4a5875c4427a@mail.kernel.org> (raw)
In-Reply-To: <20251212004350.6520-4-puranjay@kernel.org>

[-- Attachment #1: Type: text/plain, Size: 4786 bytes --]

> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 28d8d6b7bb1e..e873dc34fc2f 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -673,6 +673,21 @@ void bpf_map_free_internal_structs(struct bpf_map *map, void *obj);
>  int bpf_dynptr_from_file_sleepable(struct file *file, u32 flags,
>  				   struct bpf_dynptr *ptr__uninit);
>
> +#if defined(CONFIG_MMU) && defined(CONFIG_64BIT)
> +void *bpf_arena_alloc_pages_non_sleepable(void *p__map, void *addr__ign, u32 page_cnt, int node_id,
> +					  u64 flags);
> +void bpf_arena_free_pages_non_sleepable(void *p__map, void *ptr__ign, u32 page_cnt);
> +#else
> +static inline void *bpf_arena_alloc_pages_non_sleepable(void *p__map, void *addr__ign, u32 page_cnt,
> +							int node_id, u64 flags)
> +{
> +}

Should bpf_arena_alloc_pages_non_sleepable() return NULL here? The
function is declared to return void * but has no return statement.

> +
> +static inline void bpf_arena_free_pages_non_sleepable(void *p__map, void *ptr__ign, u32 page_cnt)
> +{
> +}
> +#endif

[ ... ]

> diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c

[ ... ]

> -static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt)
> +static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt, bool sleepable)
>  {
>  	u64 full_uaddr, uaddr_end;
> -	long kaddr, pgoff, i;
> +	long kaddr, pgoff;
>  	struct page *page;
> +	struct llist_head free_pages;
> +	struct llist_node *pos, *t;
> +	struct arena_free_span *s;
> +	unsigned long flags;
> +	int ret = 0;
>
>  	/* only aligned lower 32-bit are relevant */
>  	uaddr = (u32)uaddr;
>  	uaddr &= PAGE_MASK;
> +	kaddr = bpf_arena_get_kern_vm_start(arena) + uaddr;
>  	full_uaddr = clear_lo32(arena->user_vm_start) + uaddr;
>  	uaddr_end = min(arena->user_vm_end, full_uaddr + (page_cnt << PAGE_SHIFT));
>  	if (full_uaddr >= uaddr_end)
>  		return;
>
>  	page_cnt = (uaddr_end - full_uaddr) >> PAGE_SHIFT;
> +	pgoff = compute_pgoff(arena, uaddr);
>
> -	guard(mutex)(&arena->lock);
> +	if (!sleepable)
> +		goto defer;
> +
> +	ret = raw_res_spin_lock_irqsave(&arena->spinlock, flags);
> +
> +	/* Can't proceed without holding the spinlock so defer the free */
> +	if (ret)
> +		goto defer;
>
> -	pgoff = compute_pgoff(arena, uaddr);
> -	/* clear range */
>  	range_tree_set(&arena->rt, pgoff, page_cnt);
>
> +	init_llist_head(&free_pages);
> +	/* clear ptes and collect struct pages */
> +	apply_to_existing_page_range(&init_mm, kaddr, page_cnt << PAGE_SHIFT,
> +				     apply_range_clear_cb, &free_pages);
> +
> +	/* drop the lock to do the tlb flush and zap pages */
> +	raw_res_spin_unlock_irqrestore(&arena->spinlock, flags);
> +
> +	/* ensure no stale TLB entries */
> +	flush_tlb_kernel_range(kaddr, kaddr + (page_cnt * PAGE_SIZE));
> +
>  	if (page_cnt > 1)
>  		/* bulk zap if multiple pages being freed */
>  		zap_pages(arena, full_uaddr, page_cnt);
>
> -	kaddr = bpf_arena_get_kern_vm_start(arena) + uaddr;
> -	for (i = 0; i < page_cnt; i++, kaddr += PAGE_SIZE, full_uaddr += PAGE_SIZE) {
> -		page = vmalloc_to_page((void *)kaddr);
> -		if (!page)
> -			continue;
> +	llist_for_each_safe(pos, t, __llist_del_all(&free_pages)) {
> +		page = llist_entry(pos, struct page, pcp_llist);
>  		if (page_cnt == 1 && page_mapped(page)) /* mapped by some user process */
>  			/* Optimization for the common case of page_cnt==1:
>  			 * If page wasn't mapped into some user vma there
> @@ -640,9 +697,20 @@ static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt
>  			 * page_cnt is big it's faster to do the batched zap.
>  			 */
>  			zap_pages(arena, full_uaddr, 1);
> -		apply_to_existing_page_range(&init_mm, kaddr, PAGE_SIZE, apply_range_clear_cb,
> -					     NULL);
> +		__free_page(page);
>  	}
> +
> +	return;
> +
> +defer:
> +	s = kmalloc_nolock(sizeof(struct arena_free_span), 0, -1);
> +	if (!s)
> +		return;

Does this leak pages when kmalloc_nolock() fails? The function returns
without clearing the PTEs, flushing TLB, or freeing the pages. The
range_tree bits are also not set back to free, so these pages become
permanently inaccessible until the arena is destroyed.

In the non-sleepable path when kmalloc_nolock() returns NULL:
- Pages remain allocated in the arena
- PTEs are not cleared
- range_tree bits remain cleared (pages marked as allocated)
- No retry mechanism exists to free these pages later

> +
> +	s->page_cnt = page_cnt;
> +	s->uaddr = uaddr;
> +	llist_add(&s->node, &arena->free_spans);
> +	irq_work_queue(&arena->free_irq);
>  }

[ ... ]


---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md

CI run summary: https://github.com/kernel-patches/bpf/actions/runs/20152458491

  reply	other threads:[~2025-12-12  1:05 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-12-12  0:43 [PATCH bpf-next v4 0/4] Remove KF_SLEEPABLE from arena kfuncs Puranjay Mohan
2025-12-12  0:43 ` [PATCH bpf-next v4 1/4] bpf: arena: populate vm_area without allocating memory Puranjay Mohan
2025-12-12  1:05   ` bot+bpf-ci
2025-12-12  3:17     ` Puranjay Mohan
2025-12-12  0:43 ` [PATCH bpf-next v4 2/4] bpf: arena: use kmalloc_nolock() in place of kvcalloc() Puranjay Mohan
2025-12-12  0:43 ` [PATCH bpf-next v4 3/4] bpf: arena: make arena kfuncs any context safe Puranjay Mohan
2025-12-12  1:05   ` bot+bpf-ci [this message]
2025-12-12  3:16     ` Puranjay Mohan
2025-12-12  0:43 ` [PATCH bpf-next v4 4/4] selftests: bpf: test non-sleepable arena allocations Puranjay Mohan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=c5daf712b2a51bf284ff8b01a0a0cf3a74b9bf30873dcdf83a8b4a5875c4427a@mail.kernel.org \
    --to=bot+bpf-ci@kernel.org \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=clm@meta.com \
    --cc=daniel@iogearbox.net \
    --cc=eddyz87@gmail.com \
    --cc=ihor.solodrai@linux.dev \
    --cc=kernel-team@meta.com \
    --cc=martin.lau@kernel.org \
    --cc=memxor@gmail.com \
    --cc=puranjay12@gmail.com \
    --cc=puranjay@kernel.org \
    --cc=yonghong.song@linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).