From: Puranjay Mohan <puranjay@kernel.org>
To: bpf@vger.kernel.org
Cc: Puranjay Mohan <puranjay@kernel.org>,
Puranjay Mohan <puranjay12@gmail.com>,
Alexei Starovoitov <ast@kernel.org>,
Andrii Nakryiko <andrii@kernel.org>,
Daniel Borkmann <daniel@iogearbox.net>,
Martin KaFai Lau <martin.lau@kernel.org>,
Eduard Zingerman <eddyz87@gmail.com>,
Kumar Kartikeya Dwivedi <memxor@gmail.com>,
kernel-team@meta.com
Subject: [PATCH bpf-next v2 2/4] bpf: arena: use kmalloc_nolock() in place of kvcalloc()
Date: Fri, 14 Nov 2025 11:16:57 +0000 [thread overview]
Message-ID: <20251114111700.43292-3-puranjay@kernel.org> (raw)
In-Reply-To: <20251114111700.43292-1-puranjay@kernel.org>
To make arena_alloc_pages() safe to be called from any context, replace
kvcalloc() with kmalloc_nolock() so as it doesn't sleep or take any
locks. kmalloc_nolock() returns NULL for allocations larger than
KMALLOC_MAX_CACHE_SIZE, which is (PAGE_SIZE * 2) = 8KB on systems with
4KB pages. So, round down the allocation done by kmalloc_nolock to 1024
* 8 and reuse the array in a loop.
Signed-off-by: Puranjay Mohan <puranjay@kernel.org>
---
kernel/bpf/arena.c | 76 +++++++++++++++++++++++++++++++---------------
1 file changed, 52 insertions(+), 24 deletions(-)
diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c
index 48b8ffba3c88..7fa6e40ab3fc 100644
--- a/kernel/bpf/arena.c
+++ b/kernel/bpf/arena.c
@@ -43,6 +43,8 @@
#define GUARD_SZ round_up(1ull << sizeof_field(struct bpf_insn, off) * 8, PAGE_SIZE << 1)
#define KERN_VM_SZ (SZ_4G + GUARD_SZ)
+static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt);
+
struct bpf_arena {
struct bpf_map map;
u64 user_vm_start;
@@ -491,7 +493,10 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
/* user_vm_end/start are fixed before bpf prog runs */
long page_cnt_max = (arena->user_vm_end - arena->user_vm_start) >> PAGE_SHIFT;
u64 kern_vm_start = bpf_arena_get_kern_vm_start(arena);
+ struct apply_range_data data;
struct page **pages = NULL;
+ long remaining, mapped = 0;
+ long alloc_pages;
long pgoff = 0;
u32 uaddr32;
int ret, i;
@@ -508,12 +513,16 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
return 0;
}
- /* zeroing is needed, since alloc_pages_bulk() only fills in non-zero entries */
- pages = kvcalloc(page_cnt, sizeof(struct page *), GFP_KERNEL);
+ /*
+ * Cap allocation size to KMALLOC_MAX_CACHE_SIZE so kmalloc_nolock() can succeed.
+ */
+ alloc_pages = min(page_cnt, KMALLOC_MAX_CACHE_SIZE / sizeof(struct page *));
+ pages = kmalloc_nolock(alloc_pages * sizeof(struct page *), 0, NUMA_NO_NODE);
if (!pages)
return 0;
+ data.pages = pages;
- guard(mutex)(&arena->lock);
+ mutex_lock(&arena->lock);
if (uaddr) {
ret = is_range_tree_set(&arena->rt, pgoff, page_cnt);
@@ -528,32 +537,51 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
if (ret)
goto out_free_pages;
- struct apply_range_data data = { .pages = pages, .i = 0 };
- ret = bpf_map_alloc_pages(&arena->map, node_id, page_cnt, pages);
- if (ret)
- goto out;
-
+ remaining = page_cnt;
uaddr32 = (u32)(arena->user_vm_start + pgoff * PAGE_SIZE);
- /* Earlier checks made sure that uaddr32 + page_cnt * PAGE_SIZE - 1
- * will not overflow 32-bit. Lower 32-bit need to represent
- * contiguous user address range.
- * Map these pages at kern_vm_start base.
- * kern_vm_start + uaddr32 + page_cnt * PAGE_SIZE - 1 can overflow
- * lower 32-bit and it's ok.
- */
- ret = apply_to_page_range(&init_mm, kern_vm_start + uaddr32,
- page_cnt << PAGE_SHIFT, apply_range_set_cb, &data);
- if (ret) {
- for (i = 0; i < page_cnt; i++)
- __free_page(pages[i]);
- goto out;
+
+ while(remaining) {
+ long this_batch = min(remaining, alloc_pages);
+ /* zeroing is needed, since alloc_pages_bulk() only fills in non-zero entries */
+ memset(pages, 0, this_batch * sizeof(struct page *));
+ data.i = 0;
+
+ ret = bpf_map_alloc_pages(&arena->map, node_id, this_batch, pages);
+ if (ret)
+ goto out;
+
+ /* Earlier checks made sure that uaddr32 + page_cnt * PAGE_SIZE - 1
+ * will not overflow 32-bit. Lower 32-bit need to represent
+ * contiguous user address range.
+ * Map these pages at kern_vm_start base.
+ * kern_vm_start + uaddr32 + page_cnt * PAGE_SIZE - 1 can overflow
+ * lower 32-bit and it's ok.
+ */
+ ret = apply_to_page_range(&init_mm,
+ kern_vm_start + uaddr32 + (mapped << PAGE_SHIFT),
+ this_batch << PAGE_SHIFT, apply_range_set_cb, &data);
+ if (ret) {
+ /* data.i pages were mapped, account them and free the remaining */
+ mapped += data.i;
+ for (i = data.i; i < this_batch; i++)
+ __free_page(pages[i]);
+ goto out;
+ }
+
+ mapped += this_batch;
+ remaining -= this_batch;
}
- kvfree(pages);
+ mutex_unlock(&arena->lock);
+ kfree_nolock(pages);
return clear_lo32(arena->user_vm_start) + uaddr32;
out:
- range_tree_set(&arena->rt, pgoff, page_cnt);
+ range_tree_set(&arena->rt, pgoff + mapped, page_cnt - mapped);
+ mutex_unlock(&arena->lock);
+ if (mapped)
+ arena_free_pages(arena, clear_lo32(arena->user_vm_start) + uaddr32, mapped);
out_free_pages:
- kvfree(pages);
+ mutex_unlock(&arena->lock);
+ kfree_nolock(pages);
return 0;
}
--
2.47.1
next prev parent reply other threads:[~2025-11-14 11:17 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-11-14 11:16 [PATCH bpf-next v2 0/4] Remove KF_SLEEPABLE from arena kfuncs Puranjay Mohan
2025-11-14 11:16 ` [PATCH bpf-next v2 1/4] bpf: arena: populate vm_area without allocating memory Puranjay Mohan
2025-11-14 11:47 ` bot+bpf-ci
2025-11-14 14:57 ` Puranjay Mohan
2025-11-14 21:21 ` Alexei Starovoitov
2025-11-15 0:52 ` Puranjay Mohan
2025-11-15 1:26 ` Alexei Starovoitov
2025-11-14 11:16 ` Puranjay Mohan [this message]
2025-11-14 11:39 ` [PATCH bpf-next v2 2/4] bpf: arena: use kmalloc_nolock() in place of kvcalloc() bot+bpf-ci
2025-11-14 15:13 ` Puranjay Mohan
2025-11-14 21:25 ` Alexei Starovoitov
2025-11-14 11:16 ` [PATCH bpf-next v2 3/4] bpf: arena: make arena kfuncs any context safe Puranjay Mohan
2025-11-14 11:47 ` bot+bpf-ci
2025-11-14 15:28 ` Puranjay Mohan
2025-11-14 21:27 ` Alexei Starovoitov
2025-11-15 0:56 ` Puranjay Mohan
2025-11-15 1:28 ` Alexei Starovoitov
2025-11-15 8:18 ` kernel test robot
2025-11-16 1:15 ` kernel test robot
2025-11-14 11:16 ` [PATCH bpf-next v2 4/4] selftests: bpf: test non-sleepable arena allocations Puranjay Mohan
2025-11-14 22:18 ` Alexei Starovoitov
2025-11-15 0:58 ` Puranjay Mohan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251114111700.43292-3-puranjay@kernel.org \
--to=puranjay@kernel.org \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=eddyz87@gmail.com \
--cc=kernel-team@meta.com \
--cc=martin.lau@kernel.org \
--cc=memxor@gmail.com \
--cc=puranjay12@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox