All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH bpf-next v2] bpf: Fix special fields mem leak for non-prealloc maps
@ 2026-02-16 13:13 Mykyta Yatsenko
  2026-02-16 17:56 ` Puranjay Mohan
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Mykyta Yatsenko @ 2026-02-16 13:13 UTC (permalink / raw)
  To: bpf, ast, andrii, daniel, kafai, kernel-team, eddyz87, memxor
  Cc: Mykyta Yatsenko

From: Mykyta Yatsenko <yatsenko@meta.com>

cancel_and_free() uses xchg(&ptr, NULL) to steal the cb/ctx pointer.
For non-preallocated maps, the element is freed via bpf_mem_cache_free()
making it immediately available for reuse. Between the xchg and the
free, a concurrent BPF program sees NULL, treats it as first use,
allocates a new cb/ctx, and stores it via cmpxchg. The allocation is
then orphaned when the element is freed, causing a memory leak.

Use BPF_PTR_POISON instead of NULL in the xchg so that concurrent
readers can distinguish "being destroyed" from "never initialized"
and refuse to allocate. For preallocated maps, clear the poison
before recycled elements are handed back to BPF programs.

Fixes: 1bfbc267ec91 ("bpf: Enable bpf_timer and bpf_wq in any context")
Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
---
 include/linux/bpf.h   | 30 ++++++++++++++++++++++++++++++
 kernel/bpf/arraymap.c |  4 ++--
 kernel/bpf/hashtab.c  |  5 +++++
 kernel/bpf/helpers.c  | 25 +++++++++++++------------
 4 files changed, 50 insertions(+), 14 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index cd9b96434904..90006a910dd6 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -671,6 +671,36 @@ static inline bool bpf_map_has_internal_structs(struct bpf_map *map)
 
 void bpf_map_free_internal_structs(struct bpf_map *map, void *obj);
 
+/*
+ * Clear special fields (timer/wq/task_work) poisoned by cancel_and_free()
+ * in recycled map elements.
+ *
+ * Uses WRITE_ONCE to match the READ_ONCE/xchg/cmpxchg used by concurrent
+ * readers on the underlying pointer (bpf_async_kern.cb / bpf_task_work_kern.ctx).
+ */
+static inline void bpf_obj_init_special_fields(const struct btf_record *rec, void *obj)
+{
+	void **p;
+
+	if (IS_ERR_OR_NULL(rec))
+		return;
+
+	if (rec->timer_off >= 0) {
+		p = obj + rec->timer_off;
+		WRITE_ONCE(*p, NULL);
+	}
+
+	if (rec->wq_off >= 0) {
+		p = obj + rec->wq_off;
+		WRITE_ONCE(*p, NULL);
+	}
+
+	if (rec->task_work_off >= 0) {
+		p = obj + rec->task_work_off;
+		WRITE_ONCE(*p, NULL);
+	}
+}
+
 int bpf_dynptr_from_file_sleepable(struct file *file, u32 flags,
 				   struct bpf_dynptr *ptr__uninit);
 
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 67e9e811de3a..0ef68e5c4c90 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -386,7 +386,6 @@ static long array_map_update_elem(struct bpf_map *map, void *key, void *value,
 	if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
 		val = this_cpu_ptr(array->pptrs[index & array->index_mask]);
 		copy_map_value(map, val, value);
-		bpf_obj_free_fields(array->map.record, val);
 	} else {
 		val = array->value +
 			(u64)array->elem_size * (index & array->index_mask);
@@ -394,8 +393,9 @@ static long array_map_update_elem(struct bpf_map *map, void *key, void *value,
 			copy_map_value_locked(map, val, value, false);
 		else
 			copy_map_value(map, val, value);
-		bpf_obj_free_fields(array->map.record, val);
 	}
+	bpf_obj_free_fields(array->map.record, val);
+	bpf_obj_init_special_fields(array->map.record, val);
 	return 0;
 }
 
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 3b9d297a53be..7e93ece61296 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -300,6 +300,8 @@ static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key,
 		bpf_map_inc_elem_count(&htab->map);
 		l = container_of(node, struct htab_elem, lru_node);
 		memcpy(l->key, key, htab->map.key_size);
+		bpf_obj_init_special_fields(htab->map.record,
+					    htab_elem_value(l, htab->map.key_size));
 		return l;
 	}
 
@@ -1033,6 +1035,9 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
 	}
 
 	memcpy(l_new->key, key, key_size);
+	/* Clear special fields poisoned by cancel_and_free() in recycled elements */
+	bpf_obj_init_special_fields(htab->map.record, htab_elem_value(l_new, key_size));
+
 	if (percpu) {
 		if (prealloc) {
 			pptr = htab_elem_get_ptr(l_new, key_size);
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 7ac32798eb04..4e95bc085578 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1477,7 +1477,7 @@ static int __bpf_async_set_callback(struct bpf_async_kern *async, void *callback
 	struct bpf_async_cb *cb;
 
 	cb = READ_ONCE(async->cb);
-	if (!cb)
+	if (!cb || cb == BPF_PTR_POISON)
 		return -EINVAL;
 
 	return bpf_async_update_prog_callback(cb, prog, callback_fn);
@@ -1511,7 +1511,7 @@ BPF_CALL_3(bpf_timer_start, struct bpf_async_kern *, async, u64, nsecs, u64, fla
 		return -EINVAL;
 
 	t = READ_ONCE(async->timer);
-	if (!t || !READ_ONCE(t->cb.prog))
+	if (!t || (void *)t == BPF_PTR_POISON || !READ_ONCE(t->cb.prog))
 		return -EINVAL;
 
 	if (flags & BPF_F_TIMER_ABS)
@@ -1557,7 +1557,7 @@ BPF_CALL_1(bpf_timer_cancel, struct bpf_async_kern *, async)
 		return -EOPNOTSUPP;
 
 	t = READ_ONCE(async->timer);
-	if (!t)
+	if (!t || (void *)t == BPF_PTR_POISON)
 		return -EINVAL;
 
 	cur_t = this_cpu_read(hrtimer_running);
@@ -1669,11 +1669,8 @@ static void bpf_async_cancel_and_free(struct bpf_async_kern *async)
 {
 	struct bpf_async_cb *cb;
 
-	if (!READ_ONCE(async->cb))
-		return;
-
-	cb = xchg(&async->cb, NULL);
-	if (!cb)
+	cb = xchg(&async->cb, BPF_PTR_POISON);
+	if (!cb || cb == BPF_PTR_POISON)
 		return;
 
 	bpf_async_update_prog_callback(cb, NULL, NULL);
@@ -3183,7 +3180,7 @@ __bpf_kfunc int bpf_wq_start(struct bpf_wq *wq, unsigned int flags)
 		return -EINVAL;
 
 	w = READ_ONCE(async->work);
-	if (!w || !READ_ONCE(w->cb.prog))
+	if (!w || (void *)w == BPF_PTR_POISON || !READ_ONCE(w->cb.prog))
 		return -EINVAL;
 
 	if (!refcount_inc_not_zero(&w->cb.refcnt))
@@ -4267,6 +4264,8 @@ static struct bpf_task_work_ctx *bpf_task_work_fetch_ctx(struct bpf_task_work *t
 	struct bpf_task_work_ctx *ctx, *old_ctx;
 
 	ctx = READ_ONCE(twk->ctx);
+	if (ctx == BPF_PTR_POISON)
+		return ERR_PTR(-EINVAL);
 	if (ctx)
 		return ctx;
 
@@ -4285,6 +4284,8 @@ static struct bpf_task_work_ctx *bpf_task_work_fetch_ctx(struct bpf_task_work *t
 		 * memory and try to reuse already set context.
 		 */
 		bpf_mem_free(&bpf_global_ma, ctx);
+		if (old_ctx == BPF_PTR_POISON)
+			return ERR_PTR(-EINVAL);
 		return old_ctx;
 	}
 
@@ -4476,7 +4477,7 @@ __bpf_kfunc int bpf_timer_cancel_async(struct bpf_timer *timer)
 	int ret;
 
 	cb = READ_ONCE(async->cb);
-	if (!cb)
+	if (!cb || cb == BPF_PTR_POISON)
 		return -EINVAL;
 
 	/*
@@ -4517,8 +4518,8 @@ void bpf_task_work_cancel_and_free(void *val)
 	struct bpf_task_work_ctx *ctx;
 	enum bpf_task_work_state state;
 
-	ctx = xchg(&twk->ctx, NULL);
-	if (!ctx)
+	ctx = xchg(&twk->ctx, BPF_PTR_POISON);
+	if (!ctx || ctx == BPF_PTR_POISON)
 		return;
 
 	state = xchg(&ctx->state, BPF_TW_FREED);
-- 
2.53.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2026-03-17  9:46 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-02-16 13:13 [PATCH bpf-next v2] bpf: Fix special fields mem leak for non-prealloc maps Mykyta Yatsenko
2026-02-16 17:56 ` Puranjay Mohan
2026-02-16 22:55 ` kernel test robot
2026-03-17  9:30 ` Varun R Mallya
2026-03-17  9:45   ` Kumar Kartikeya Dwivedi

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.