public inbox for bpf@vger.kernel.org
 help / color / mirror / Atom feed
From: Tejun Heo <tj@kernel.org>
To: Kumar Kartikeya Dwivedi <memxor@gmail.com>,
	Alexei Starovoitov <ast@kernel.org>,
	Emil Tsalapatis <emil@etsalapatis.com>,
	Eduard Zingerman <eddyz87@gmail.com>,
	Andrii Nakryiko <andrii@kernel.org>
Cc: David Vernet <void@manifault.com>,
	Andrea Righi <arighi@nvidia.com>,
	Changwoo Min <changwoo@igalia.com>,
	bpf@vger.kernel.org, sched-ext@lists.linux.dev,
	linux-kernel@vger.kernel.org
Subject: [RFC PATCH 1/9] bpf/arena: Plumb struct bpf_arena * through PTE callbacks
Date: Mon, 27 Apr 2026 00:51:01 -1000	[thread overview]
Message-ID: <20260427105109.2554518-2-tj@kernel.org> (raw)
In-Reply-To: <20260427105109.2554518-1-tj@kernel.org>

A subsequent change needs the PTE callbacks in bpf_arena to consult
per-arena state. Make struct bpf_arena * reachable from each:

- apply_range_set_cb: add an arena field to apply_range_data. The
  data arg can no longer be NULL (it now carries arena), so the
  "skip PTE install" sentinel used by populate_pgtable_except_pte()
  shifts from data == NULL to data->pages == NULL.

- apply_range_clear_cb: introduce struct apply_range_clear_data
  { arena, free_pages } in place of the bare struct llist_head *
  arg.

- existing_page_cb: arena_map_free() passes arena instead of NULL.
  The callback doesn't read it yet.

No behavior change.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/bpf/arena.c | 39 +++++++++++++++++++++++++++++----------
 1 file changed, 29 insertions(+), 10 deletions(-)

diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c
index 08d008cc471e..02249d2514f8 100644
--- a/kernel/bpf/arena.c
+++ b/kernel/bpf/arena.c
@@ -114,16 +114,22 @@ static long compute_pgoff(struct bpf_arena *arena, long uaddr)
 }
 
 struct apply_range_data {
-	struct page **pages;
+	struct bpf_arena *arena;
+	struct page **pages;	/* NULL: skip PTE install */
 	int i;
 };
 
+struct apply_range_clear_data {
+	struct bpf_arena *arena;
+	struct llist_head *free_pages;
+};
+
 static int apply_range_set_cb(pte_t *pte, unsigned long addr, void *data)
 {
 	struct apply_range_data *d = data;
 	struct page *page;
 
-	if (!data)
+	if (!d->pages)
 		return 0;
 	/* sanity check */
 	if (unlikely(!pte_none(ptep_get(pte))))
@@ -144,8 +150,9 @@ static void flush_vmap_cache(unsigned long start, unsigned long size)
 	flush_cache_vmap(start, start + size);
 }
 
-static int apply_range_clear_cb(pte_t *pte, unsigned long addr, void *free_pages)
+static int apply_range_clear_cb(pte_t *pte, unsigned long addr, void *data)
 {
+	struct apply_range_clear_data *d = data;
 	pte_t old_pte;
 	struct page *page;
 
@@ -161,16 +168,18 @@ static int apply_range_clear_cb(pte_t *pte, unsigned long addr, void *free_pages
 	pte_clear(&init_mm, addr, pte);
 
 	/* Add page to the list so it is freed later */
-	if (free_pages)
-		__llist_add(&page->pcp_llist, free_pages);
+	if (d->free_pages)
+		__llist_add(&page->pcp_llist, d->free_pages);
 
 	return 0;
 }
 
 static int populate_pgtable_except_pte(struct bpf_arena *arena)
 {
+	struct apply_range_data data = { .arena = arena };
+
 	return apply_to_page_range(&init_mm, bpf_arena_get_kern_vm_start(arena),
-				   KERN_VM_SZ - GUARD_SZ, apply_range_set_cb, NULL);
+				   KERN_VM_SZ - GUARD_SZ, apply_range_set_cb, &data);
 }
 
 static struct bpf_map *arena_map_alloc(union bpf_attr *attr)
@@ -286,7 +295,7 @@ static void arena_map_free(struct bpf_map *map)
 	 * free those pages.
 	 */
 	apply_to_existing_page_range(&init_mm, bpf_arena_get_kern_vm_start(arena),
-				     KERN_VM_SZ - GUARD_SZ, existing_page_cb, NULL);
+				     KERN_VM_SZ - GUARD_SZ, existing_page_cb, arena);
 	free_vm_area(arena->kern_vm);
 	range_tree_destroy(&arena->rt);
 	bpf_map_area_free(arena);
@@ -388,7 +397,7 @@ static vm_fault_t arena_vm_fault(struct vm_fault *vmf)
 	if (ret)
 		goto out_unlock_sigsegv;
 
-	struct apply_range_data data = { .pages = &page, .i = 0 };
+	struct apply_range_data data = { .arena = arena, .pages = &page, .i = 0 };
 	/* Account into memcg of the process that created bpf_arena */
 	ret = bpf_map_alloc_pages(map, NUMA_NO_NODE, 1, &page);
 	if (ret) {
@@ -569,6 +578,7 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
 		bpf_map_memcg_exit(old_memcg, new_memcg);
 		return 0;
 	}
+	data.arena = arena;
 	data.pages = pages;
 
 	if (raw_res_spin_lock_irqsave(&arena->spinlock, flags))
@@ -696,9 +706,13 @@ static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt,
 	range_tree_set(&arena->rt, pgoff, page_cnt);
 
 	init_llist_head(&free_pages);
+	struct apply_range_clear_data clear_data = {
+		.arena = arena,
+		.free_pages = &free_pages,
+	};
 	/* clear ptes and collect struct pages */
 	apply_to_existing_page_range(&init_mm, kaddr, page_cnt << PAGE_SHIFT,
-				     apply_range_clear_cb, &free_pages);
+				     apply_range_clear_cb, &clear_data);
 
 	/* drop the lock to do the tlb flush and zap pages */
 	raw_res_spin_unlock_irqrestore(&arena->spinlock, flags);
@@ -804,6 +818,11 @@ static void arena_free_worker(struct work_struct *work)
 	arena_vm_start = bpf_arena_get_kern_vm_start(arena);
 	user_vm_start = bpf_arena_get_user_vm_start(arena);
 
+	struct apply_range_clear_data clear_data = {
+		.arena = arena,
+		.free_pages = &free_pages,
+	};
+
 	list = llist_del_all(&arena->free_spans);
 	llist_for_each(pos, list) {
 		s = llist_entry(pos, struct arena_free_span, node);
@@ -813,7 +832,7 @@ static void arena_free_worker(struct work_struct *work)
 
 		/* clear ptes and collect pages in free_pages llist */
 		apply_to_existing_page_range(&init_mm, kaddr, page_cnt << PAGE_SHIFT,
-					     apply_range_clear_cb, &free_pages);
+					     apply_range_clear_cb, &clear_data);
 
 		range_tree_set(&arena->rt, pgoff, page_cnt);
 	}
-- 
2.53.0


  reply	other threads:[~2026-04-27 10:51 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-27 10:51 [RFC PATCH 0/9] bpf/arena: Direct kernel-side access Tejun Heo
2026-04-27 10:51 ` Tejun Heo [this message]
2026-04-27 10:51 ` [RFC PATCH 2/9] bpf/arena: Add BPF_F_ARENA_MAP_ALWAYS for direct kernel access Tejun Heo
2026-04-27 10:51 ` [RFC PATCH 3/9] bpf: Add sleepable variant of bpf_arena_alloc_pages for kernel callers Tejun Heo
2026-04-27 10:51 ` [RFC PATCH 4/9] bpf: Add bpf_struct_ops_for_each_prog() Tejun Heo
2026-04-27 10:51 ` [RFC PATCH 5/9] bpf: Add bpf_prog_for_each_used_map() Tejun Heo
2026-04-27 10:51 ` [RFC PATCH 6/9] bpf/arena: Add bpf_arena_map_kern_vm_start() Tejun Heo
2026-04-27 10:51 ` [RFC PATCH 7/9] sched_ext: Require MAP_ALWAYS arena for cid-form schedulers Tejun Heo
2026-04-27 10:51 ` [RFC PATCH 8/9] sched_ext: Sub-allocator over kernel-claimed BPF arena pages Tejun Heo
2026-04-27 10:51 ` [RFC PATCH 9/9] sched_ext: Convert ops.set_cmask() to arena-resident cmask Tejun Heo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260427105109.2554518-2-tj@kernel.org \
    --to=tj@kernel.org \
    --cc=andrii@kernel.org \
    --cc=arighi@nvidia.com \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=changwoo@igalia.com \
    --cc=eddyz87@gmail.com \
    --cc=emil@etsalapatis.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=memxor@gmail.com \
    --cc=sched-ext@lists.linux.dev \
    --cc=void@manifault.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox