* [PATCH bpf-next v2 1/2] bpf: syscall: Introduce memcg enter/exit helpers
2025-12-31 14:14 [PATCH bpf-next v2 0/2] memcg accounting for BPF arena Puranjay Mohan
@ 2025-12-31 14:14 ` Puranjay Mohan
2025-12-31 14:14 ` [PATCH bpf-next v2 2/2] bpf: arena: Reintroduce memcg accounting Puranjay Mohan
1 sibling, 0 replies; 5+ messages in thread
From: Puranjay Mohan @ 2025-12-31 14:14 UTC (permalink / raw)
To: bpf
Cc: Puranjay Mohan, Puranjay Mohan, Alexei Starovoitov,
Andrii Nakryiko, Daniel Borkmann, Martin KaFai Lau,
Eduard Zingerman, Kumar Kartikeya Dwivedi, kernel-team
Introduce bpf_map_memcg_enter() and bpf_map_memcg_exit() helpers to
reduce code duplication in memcg context management.
bpf_map_memcg_enter() gets the memcg from the map, sets it as active,
and returns both the previous and the now active memcg.
bpf_map_memcg_exit() restores the previous active memcg and releases the
reference obtained during enter.
Signed-off-by: Puranjay Mohan <puranjay@kernel.org>
---
include/linux/bpf.h | 15 +++++++++++++
kernel/bpf/syscall.c | 50 +++++++++++++++++++++++---------------------
2 files changed, 41 insertions(+), 24 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 4e7d72dfbcd4..24a32b1043d1 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2608,6 +2608,10 @@ struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id);
int bpf_map_alloc_pages(const struct bpf_map *map, int nid,
unsigned long nr_pages, struct page **page_array);
#ifdef CONFIG_MEMCG
+void bpf_map_memcg_enter(const struct bpf_map *map, struct mem_cgroup **old_memcg,
+ struct mem_cgroup **new_memcg);
+void bpf_map_memcg_exit(struct mem_cgroup *old_memcg,
+ struct mem_cgroup *memcg);
void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags,
int node);
void *bpf_map_kmalloc_nolock(const struct bpf_map *map, size_t size, gfp_t flags,
@@ -2632,6 +2636,17 @@ void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size,
kvcalloc(_n, _size, _flags)
#define bpf_map_alloc_percpu(_map, _size, _align, _flags) \
__alloc_percpu_gfp(_size, _align, _flags)
+static inline void bpf_map_memcg_enter(const struct bpf_map *map, struct mem_cgroup **old_memcg,
+ struct mem_cgroup **new_memcg)
+{
+ *new_memcg = NULL;
+ *old_memcg = NULL;
+}
+
+static inline void bpf_map_memcg_exit(struct mem_cgroup *old_memcg,
+ struct mem_cgroup *memcg)
+{
+}
#endif
static inline int
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index a4d38272d8bc..63849a06518b 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -505,17 +505,29 @@ static struct mem_cgroup *bpf_map_get_memcg(const struct bpf_map *map)
return root_mem_cgroup;
}
+void bpf_map_memcg_enter(const struct bpf_map *map, struct mem_cgroup **old_memcg,
+ struct mem_cgroup **new_memcg)
+{
+ *new_memcg = bpf_map_get_memcg(map);
+ *old_memcg = set_active_memcg(*new_memcg);
+}
+
+void bpf_map_memcg_exit(struct mem_cgroup *old_memcg,
+ struct mem_cgroup *new_memcg)
+{
+ set_active_memcg(old_memcg);
+ mem_cgroup_put(new_memcg);
+}
+
void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags,
int node)
{
struct mem_cgroup *memcg, *old_memcg;
void *ptr;
- memcg = bpf_map_get_memcg(map);
- old_memcg = set_active_memcg(memcg);
+ bpf_map_memcg_enter(map, &old_memcg, &memcg);
ptr = kmalloc_node(size, flags | __GFP_ACCOUNT, node);
- set_active_memcg(old_memcg);
- mem_cgroup_put(memcg);
+ bpf_map_memcg_exit(old_memcg, memcg);
return ptr;
}
@@ -526,11 +538,9 @@ void *bpf_map_kmalloc_nolock(const struct bpf_map *map, size_t size, gfp_t flags
struct mem_cgroup *memcg, *old_memcg;
void *ptr;
- memcg = bpf_map_get_memcg(map);
- old_memcg = set_active_memcg(memcg);
+ bpf_map_memcg_enter(map, &old_memcg, &memcg);
ptr = kmalloc_nolock(size, flags | __GFP_ACCOUNT, node);
- set_active_memcg(old_memcg);
- mem_cgroup_put(memcg);
+ bpf_map_memcg_exit(old_memcg, memcg);
return ptr;
}
@@ -540,11 +550,9 @@ void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags)
struct mem_cgroup *memcg, *old_memcg;
void *ptr;
- memcg = bpf_map_get_memcg(map);
- old_memcg = set_active_memcg(memcg);
+ bpf_map_memcg_enter(map, &old_memcg, &memcg);
ptr = kzalloc(size, flags | __GFP_ACCOUNT);
- set_active_memcg(old_memcg);
- mem_cgroup_put(memcg);
+ bpf_map_memcg_exit(old_memcg, memcg);
return ptr;
}
@@ -555,11 +563,9 @@ void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size,
struct mem_cgroup *memcg, *old_memcg;
void *ptr;
- memcg = bpf_map_get_memcg(map);
- old_memcg = set_active_memcg(memcg);
+ bpf_map_memcg_enter(map, &old_memcg, &memcg);
ptr = kvcalloc(n, size, flags | __GFP_ACCOUNT);
- set_active_memcg(old_memcg);
- mem_cgroup_put(memcg);
+ bpf_map_memcg_exit(old_memcg, memcg);
return ptr;
}
@@ -570,11 +576,9 @@ void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size,
struct mem_cgroup *memcg, *old_memcg;
void __percpu *ptr;
- memcg = bpf_map_get_memcg(map);
- old_memcg = set_active_memcg(memcg);
+ bpf_map_memcg_enter(map, &old_memcg, &memcg);
ptr = __alloc_percpu_gfp(size, align, flags | __GFP_ACCOUNT);
- set_active_memcg(old_memcg);
- mem_cgroup_put(memcg);
+ bpf_map_memcg_exit(old_memcg, memcg);
return ptr;
}
@@ -615,8 +619,7 @@ int bpf_map_alloc_pages(const struct bpf_map *map, int nid,
#ifdef CONFIG_MEMCG
struct mem_cgroup *memcg, *old_memcg;
- memcg = bpf_map_get_memcg(map);
- old_memcg = set_active_memcg(memcg);
+ bpf_map_memcg_enter(map, &old_memcg, &memcg);
#endif
for (i = 0; i < nr_pages; i++) {
pg = __bpf_alloc_page(nid);
@@ -632,8 +635,7 @@ int bpf_map_alloc_pages(const struct bpf_map *map, int nid,
}
#ifdef CONFIG_MEMCG
- set_active_memcg(old_memcg);
- mem_cgroup_put(memcg);
+ bpf_map_memcg_exit(old_memcg, memcg);
#endif
return ret;
}
--
2.47.3
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH bpf-next v2 2/2] bpf: arena: Reintroduce memcg accounting
2025-12-31 14:14 [PATCH bpf-next v2 0/2] memcg accounting for BPF arena Puranjay Mohan
2025-12-31 14:14 ` [PATCH bpf-next v2 1/2] bpf: syscall: Introduce memcg enter/exit helpers Puranjay Mohan
@ 2025-12-31 14:14 ` Puranjay Mohan
2025-12-31 17:25 ` Alexei Starovoitov
1 sibling, 1 reply; 5+ messages in thread
From: Puranjay Mohan @ 2025-12-31 14:14 UTC (permalink / raw)
To: bpf
Cc: Puranjay Mohan, Puranjay Mohan, Alexei Starovoitov,
Andrii Nakryiko, Daniel Borkmann, Martin KaFai Lau,
Eduard Zingerman, Kumar Kartikeya Dwivedi, kernel-team
When arena allocations were converted from bpf_map_alloc_pages() to
kmalloc_nolock() to support non-sleepable contexts, memcg accounting was
inadvertently lost. This commit restores proper memory accounting for
all arena-related allocations.
All arena related allocations are accounted into memcg of the process
that created bpf_arena.
Signed-off-by: Puranjay Mohan <puranjay@kernel.org>
---
kernel/bpf/arena.c | 44 ++++++++++++++++++++++++++++++++++++-----
kernel/bpf/range_tree.c | 5 +++--
2 files changed, 42 insertions(+), 7 deletions(-)
diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c
index 456ac989269d..45b55961683f 100644
--- a/kernel/bpf/arena.c
+++ b/kernel/bpf/arena.c
@@ -360,6 +360,7 @@ static vm_fault_t arena_vm_fault(struct vm_fault *vmf)
{
struct bpf_map *map = vmf->vma->vm_file->private_data;
struct bpf_arena *arena = container_of(map, struct bpf_arena, map);
+ struct mem_cgroup *new_memcg, *old_memcg;
struct page *page;
long kbase, kaddr;
unsigned long flags;
@@ -377,6 +378,8 @@ static vm_fault_t arena_vm_fault(struct vm_fault *vmf)
/* already have a page vmap-ed */
goto out;
+ bpf_map_memcg_enter(map, &old_memcg, &new_memcg);
+
if (arena->map.map_flags & BPF_F_SEGV_ON_FAULT)
/* User space requested to segfault when page is not allocated by bpf prog */
goto out_unlock_sigsegv;
@@ -400,12 +403,14 @@ static vm_fault_t arena_vm_fault(struct vm_fault *vmf)
goto out_unlock_sigsegv;
}
flush_vmap_cache(kaddr, PAGE_SIZE);
+ bpf_map_memcg_exit(old_memcg, new_memcg);
out:
page_ref_add(page, 1);
raw_res_spin_unlock_irqrestore(&arena->spinlock, flags);
vmf->page = page;
return 0;
out_unlock_sigsegv:
+ bpf_map_memcg_exit(old_memcg, new_memcg);
raw_res_spin_unlock_irqrestore(&arena->spinlock, flags);
return VM_FAULT_SIGSEGV;
}
@@ -557,7 +562,7 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
/* Cap allocation size to KMALLOC_MAX_CACHE_SIZE so kmalloc_nolock() can succeed. */
alloc_pages = min(page_cnt, KMALLOC_MAX_CACHE_SIZE / sizeof(struct page *));
- pages = kmalloc_nolock(alloc_pages * sizeof(struct page *), 0, NUMA_NO_NODE);
+ pages = kmalloc_nolock(alloc_pages * sizeof(struct page *), __GFP_ACCOUNT, NUMA_NO_NODE);
if (!pages)
return 0;
data.pages = pages;
@@ -713,7 +718,7 @@ static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt,
return;
defer:
- s = kmalloc_nolock(sizeof(struct arena_free_span), 0, -1);
+ s = kmalloc_nolock(sizeof(struct arena_free_span), __GFP_ACCOUNT, -1);
if (!s)
/*
* If allocation fails in non-sleepable context, pages are intentionally left
@@ -766,6 +771,7 @@ static int arena_reserve_pages(struct bpf_arena *arena, long uaddr, u32 page_cnt
static void arena_free_worker(struct work_struct *work)
{
struct bpf_arena *arena = container_of(work, struct bpf_arena, free_work);
+ struct mem_cgroup *new_memcg, *old_memcg;
struct llist_node *list, *pos, *t;
struct arena_free_span *s;
u64 arena_vm_start, user_vm_start;
@@ -780,6 +786,8 @@ static void arena_free_worker(struct work_struct *work)
return;
}
+ bpf_map_memcg_enter(&arena->map, &old_memcg, &new_memcg);
+
init_llist_head(&free_pages);
arena_vm_start = bpf_arena_get_kern_vm_start(arena);
user_vm_start = bpf_arena_get_user_vm_start(arena);
@@ -820,6 +828,8 @@ static void arena_free_worker(struct work_struct *work)
page = llist_entry(pos, struct page, pcp_llist);
__free_page(page);
}
+
+ bpf_map_memcg_exit(old_memcg, new_memcg);
}
static void arena_free_irq(struct irq_work *iw)
@@ -834,49 +844,69 @@ __bpf_kfunc_start_defs();
__bpf_kfunc void *bpf_arena_alloc_pages(void *p__map, void *addr__ign, u32 page_cnt,
int node_id, u64 flags)
{
+ void *ret;
struct bpf_map *map = p__map;
+ struct mem_cgroup *new_memcg, *old_memcg;
struct bpf_arena *arena = container_of(map, struct bpf_arena, map);
if (map->map_type != BPF_MAP_TYPE_ARENA || flags || !page_cnt)
return NULL;
- return (void *)arena_alloc_pages(arena, (long)addr__ign, page_cnt, node_id, true);
+ bpf_map_memcg_enter(map, &old_memcg, &new_memcg);
+ ret = (void *)arena_alloc_pages(arena, (long)addr__ign, page_cnt, node_id, true);
+ bpf_map_memcg_exit(old_memcg, new_memcg);
+
+ return ret;
}
void *bpf_arena_alloc_pages_non_sleepable(void *p__map, void *addr__ign, u32 page_cnt,
int node_id, u64 flags)
{
+ void *ret;
struct bpf_map *map = p__map;
+ struct mem_cgroup *new_memcg, *old_memcg;
struct bpf_arena *arena = container_of(map, struct bpf_arena, map);
if (map->map_type != BPF_MAP_TYPE_ARENA || flags || !page_cnt)
return NULL;
- return (void *)arena_alloc_pages(arena, (long)addr__ign, page_cnt, node_id, false);
+ bpf_map_memcg_enter(map, &old_memcg, &new_memcg);
+ ret = (void *)arena_alloc_pages(arena, (long)addr__ign, page_cnt, node_id, false);
+ bpf_map_memcg_exit(old_memcg, new_memcg);
+
+ return ret;
}
__bpf_kfunc void bpf_arena_free_pages(void *p__map, void *ptr__ign, u32 page_cnt)
{
struct bpf_map *map = p__map;
+ struct mem_cgroup *new_memcg, *old_memcg;
struct bpf_arena *arena = container_of(map, struct bpf_arena, map);
if (map->map_type != BPF_MAP_TYPE_ARENA || !page_cnt || !ptr__ign)
return;
+ bpf_map_memcg_enter(map, &old_memcg, &new_memcg);
arena_free_pages(arena, (long)ptr__ign, page_cnt, true);
+ bpf_map_memcg_exit(old_memcg, new_memcg);
}
void bpf_arena_free_pages_non_sleepable(void *p__map, void *ptr__ign, u32 page_cnt)
{
struct bpf_map *map = p__map;
+ struct mem_cgroup *new_memcg, *old_memcg;
struct bpf_arena *arena = container_of(map, struct bpf_arena, map);
if (map->map_type != BPF_MAP_TYPE_ARENA || !page_cnt || !ptr__ign)
return;
+ bpf_map_memcg_enter(map, &old_memcg, &new_memcg);
arena_free_pages(arena, (long)ptr__ign, page_cnt, false);
+ bpf_map_memcg_exit(old_memcg, new_memcg);
}
__bpf_kfunc int bpf_arena_reserve_pages(void *p__map, void *ptr__ign, u32 page_cnt)
{
+ int ret;
struct bpf_map *map = p__map;
+ struct mem_cgroup *new_memcg, *old_memcg;
struct bpf_arena *arena = container_of(map, struct bpf_arena, map);
if (map->map_type != BPF_MAP_TYPE_ARENA)
@@ -885,7 +915,11 @@ __bpf_kfunc int bpf_arena_reserve_pages(void *p__map, void *ptr__ign, u32 page_c
if (!page_cnt)
return 0;
- return arena_reserve_pages(arena, (long)ptr__ign, page_cnt);
+ bpf_map_memcg_enter(map, &old_memcg, &new_memcg);
+ ret = arena_reserve_pages(arena, (long)ptr__ign, page_cnt);
+ bpf_map_memcg_exit(old_memcg, new_memcg);
+
+ return ret;
}
__bpf_kfunc_end_defs();
diff --git a/kernel/bpf/range_tree.c b/kernel/bpf/range_tree.c
index 99c63d982c5d..2f28886f3ff7 100644
--- a/kernel/bpf/range_tree.c
+++ b/kernel/bpf/range_tree.c
@@ -149,7 +149,8 @@ int range_tree_clear(struct range_tree *rt, u32 start, u32 len)
range_it_insert(rn, rt);
/* Add a range */
- new_rn = kmalloc_nolock(sizeof(struct range_node), 0, NUMA_NO_NODE);
+ new_rn = kmalloc_nolock(sizeof(struct range_node), __GFP_ACCOUNT,
+ NUMA_NO_NODE);
if (!new_rn)
return -ENOMEM;
new_rn->rn_start = last + 1;
@@ -234,7 +235,7 @@ int range_tree_set(struct range_tree *rt, u32 start, u32 len)
right->rn_start = start;
range_it_insert(right, rt);
} else {
- left = kmalloc_nolock(sizeof(struct range_node), 0, NUMA_NO_NODE);
+ left = kmalloc_nolock(sizeof(struct range_node), __GFP_ACCOUNT, NUMA_NO_NODE);
if (!left)
return -ENOMEM;
left->rn_start = start;
--
2.47.3
^ permalink raw reply related [flat|nested] 5+ messages in thread