From: Puranjay Mohan <puranjay@kernel.org>
To: bpf@vger.kernel.org
Cc: Puranjay Mohan <puranjay@kernel.org>,
Puranjay Mohan <puranjay12@gmail.com>,
Alexei Starovoitov <ast@kernel.org>,
Andrii Nakryiko <andrii@kernel.org>,
Daniel Borkmann <daniel@iogearbox.net>,
Martin KaFai Lau <martin.lau@kernel.org>,
Eduard Zingerman <eddyz87@gmail.com>,
Kumar Kartikeya Dwivedi <memxor@gmail.com>,
kernel-team@meta.com
Subject: [PATCH bpf-next v4 2/4] bpf: arena: use kmalloc_nolock() in place of kvcalloc()
Date: Fri, 12 Dec 2025 09:43:47 +0900 [thread overview]
Message-ID: <20251212004350.6520-3-puranjay@kernel.org> (raw)
In-Reply-To: <20251212004350.6520-1-puranjay@kernel.org>
To make arena_alloc_pages() safe to be called from any context, replace
kvcalloc() with kmalloc_nolock() so as it doesn't sleep or take any
locks. kmalloc_nolock() returns NULL for allocations larger than
KMALLOC_MAX_CACHE_SIZE, which is (PAGE_SIZE * 2) = 8KB on systems with
4KB pages. So, round down the allocation done by kmalloc_nolock to 1024
* 8 and reuse the array in a loop.
Signed-off-by: Puranjay Mohan <puranjay@kernel.org>
---
kernel/bpf/arena.c | 82 +++++++++++++++++++++++++++++++---------------
1 file changed, 56 insertions(+), 26 deletions(-)
diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c
index 214a4da54162..a85f06a6a777 100644
--- a/kernel/bpf/arena.c
+++ b/kernel/bpf/arena.c
@@ -43,6 +43,8 @@
#define GUARD_SZ round_up(1ull << sizeof_field(struct bpf_insn, off) * 8, PAGE_SIZE << 1)
#define KERN_VM_SZ (SZ_4G + GUARD_SZ)
+static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt);
+
struct bpf_arena {
struct bpf_map map;
u64 user_vm_start;
@@ -492,7 +494,10 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
/* user_vm_end/start are fixed before bpf prog runs */
long page_cnt_max = (arena->user_vm_end - arena->user_vm_start) >> PAGE_SHIFT;
u64 kern_vm_start = bpf_arena_get_kern_vm_start(arena);
+ struct apply_range_data data;
struct page **pages = NULL;
+ long remaining, mapped = 0;
+ long alloc_pages;
long pgoff = 0;
u32 uaddr32;
int ret, i;
@@ -509,17 +514,19 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
return 0;
}
- /* zeroing is needed, since alloc_pages_bulk() only fills in non-zero entries */
- pages = kvcalloc(page_cnt, sizeof(struct page *), GFP_KERNEL);
+ /* Cap allocation size to KMALLOC_MAX_CACHE_SIZE so kmalloc_nolock() can succeed. */
+ alloc_pages = min(page_cnt, KMALLOC_MAX_CACHE_SIZE / sizeof(struct page *));
+ pages = kmalloc_nolock(alloc_pages * sizeof(struct page *), 0, NUMA_NO_NODE);
if (!pages)
return 0;
+ data.pages = pages;
- guard(mutex)(&arena->lock);
+ mutex_lock(&arena->lock);
if (uaddr) {
ret = is_range_tree_set(&arena->rt, pgoff, page_cnt);
if (ret)
- goto out_free_pages;
+ goto out_unlock_free_pages;
ret = range_tree_clear(&arena->rt, pgoff, page_cnt);
} else {
ret = pgoff = range_tree_find(&arena->rt, page_cnt);
@@ -527,34 +534,57 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
ret = range_tree_clear(&arena->rt, pgoff, page_cnt);
}
if (ret)
- goto out_free_pages;
-
- struct apply_range_data data = { .pages = pages, .i = 0 };
- ret = bpf_map_alloc_pages(&arena->map, node_id, page_cnt, pages);
- if (ret)
- goto out;
+ goto out_unlock_free_pages;
+ remaining = page_cnt;
uaddr32 = (u32)(arena->user_vm_start + pgoff * PAGE_SIZE);
- /* Earlier checks made sure that uaddr32 + page_cnt * PAGE_SIZE - 1
- * will not overflow 32-bit. Lower 32-bit need to represent
- * contiguous user address range.
- * Map these pages at kern_vm_start base.
- * kern_vm_start + uaddr32 + page_cnt * PAGE_SIZE - 1 can overflow
- * lower 32-bit and it's ok.
- */
- ret = apply_to_page_range(&init_mm, kern_vm_start + uaddr32,
- page_cnt << PAGE_SHIFT, apply_range_set_cb, &data);
- if (ret) {
- for (i = 0; i < page_cnt; i++)
- __free_page(pages[i]);
- goto out;
+
+ while (remaining) {
+ long this_batch = min(remaining, alloc_pages);
+
+ /* zeroing is needed, since alloc_pages_bulk() only fills in non-zero entries */
+ memset(pages, 0, this_batch * sizeof(struct page *));
+
+ ret = bpf_map_alloc_pages(&arena->map, node_id, this_batch, pages);
+ if (ret)
+ goto out;
+
+ /*
+ * Earlier checks made sure that uaddr32 + page_cnt * PAGE_SIZE - 1
+ * will not overflow 32-bit. Lower 32-bit need to represent
+ * contiguous user address range.
+ * Map these pages at kern_vm_start base.
+ * kern_vm_start + uaddr32 + page_cnt * PAGE_SIZE - 1 can overflow
+ * lower 32-bit and it's ok.
+ */
+ data.i = 0;
+ ret = apply_to_page_range(&init_mm,
+ kern_vm_start + uaddr32 + (mapped << PAGE_SHIFT),
+ this_batch << PAGE_SHIFT, apply_range_set_cb, &data);
+ if (ret) {
+ /* data.i pages were mapped, account them and free the remaining */
+ mapped += data.i;
+ for (i = data.i; i < this_batch; i++)
+ __free_page(pages[i]);
+ goto out;
+ }
+
+ mapped += this_batch;
+ remaining -= this_batch;
}
- kvfree(pages);
+ mutex_unlock(&arena->lock);
+ kfree_nolock(pages);
return clear_lo32(arena->user_vm_start) + uaddr32;
out:
- range_tree_set(&arena->rt, pgoff, page_cnt);
+ range_tree_set(&arena->rt, pgoff + mapped, page_cnt - mapped);
+ mutex_unlock(&arena->lock);
+ if (mapped)
+ arena_free_pages(arena, clear_lo32(arena->user_vm_start) + uaddr32, mapped);
+ goto out_free_pages;
+out_unlock_free_pages:
+ mutex_unlock(&arena->lock);
out_free_pages:
- kvfree(pages);
+ kfree_nolock(pages);
return 0;
}
--
2.50.1
next prev parent reply other threads:[~2025-12-12 0:44 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-12-12 0:43 [PATCH bpf-next v4 0/4] Remove KF_SLEEPABLE from arena kfuncs Puranjay Mohan
2025-12-12 0:43 ` [PATCH bpf-next v4 1/4] bpf: arena: populate vm_area without allocating memory Puranjay Mohan
2025-12-12 1:05 ` bot+bpf-ci
2025-12-12 3:17 ` Puranjay Mohan
2025-12-12 0:43 ` Puranjay Mohan [this message]
2025-12-12 0:43 ` [PATCH bpf-next v4 3/4] bpf: arena: make arena kfuncs any context safe Puranjay Mohan
2025-12-12 1:05 ` bot+bpf-ci
2025-12-12 3:16 ` Puranjay Mohan
2025-12-12 0:43 ` [PATCH bpf-next v4 4/4] selftests: bpf: test non-sleepable arena allocations Puranjay Mohan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251212004350.6520-3-puranjay@kernel.org \
--to=puranjay@kernel.org \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=eddyz87@gmail.com \
--cc=kernel-team@meta.com \
--cc=martin.lau@kernel.org \
--cc=memxor@gmail.com \
--cc=puranjay12@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).