BPF List
 help / color / mirror / Atom feed
From: thinker.li@gmail.com
To: bpf@vger.kernel.org, ast@kernel.org, martin.lau@linux.dev,
	song@kernel.org, kernel-team@meta.com, andrii@kernel.org
Cc: sinquersw@gmail.com, kuifeng@meta.com,
	Kui-Feng Lee <thinker.li@gmail.com>
Subject: [PATCH bpf-next 1/2] bpf: struct_ops supports more than one page for trampolines.
Date: Fri, 16 Feb 2024 10:28:27 -0800	[thread overview]
Message-ID: <20240216182828.201727-2-thinker.li@gmail.com> (raw)
In-Reply-To: <20240216182828.201727-1-thinker.li@gmail.com>

From: Kui-Feng Lee <thinker.li@gmail.com>

The BPF struct_ops previously only allowed for one page to be used for the
trampolines of all links in a map. However, we have recently run out of
space due to the large number of BPF program links. By allocating
additional pages when we exhaust an existing page, we can accommodate more
links in a single map.

The variable st_map->image has been changed to st_map->image_pages, and its
type has been changed to an array of pointers to buffers of PAGE_SIZE. The
array is dynamically resized and additional pages are allocated when all
existing pages are exhausted.

Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
---
 kernel/bpf/bpf_struct_ops.c | 99 ++++++++++++++++++++++++++++++-------
 1 file changed, 80 insertions(+), 19 deletions(-)

diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 0d7be97a2411..bb7ae665006a 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -30,12 +30,11 @@ struct bpf_struct_ops_map {
 	 */
 	struct bpf_link **links;
 	u32 links_cnt;
-	/* image is a page that has all the trampolines
+	u32 image_pages_cnt;
+	/* image_pages is an array of pages that has all the trampolines
 	 * that stores the func args before calling the bpf_prog.
-	 * A PAGE_SIZE "image" is enough to store all trampoline for
-	 * "links[]".
 	 */
-	void *image;
+	void **image_pages;
 	/* The owner moduler's btf. */
 	struct btf *btf;
 	/* uvalue->data stores the kernel struct
@@ -535,7 +534,8 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
 	void *udata, *kdata;
 	int prog_fd, err;
 	void *image, *image_end;
-	u32 i;
+	void **image_pages;
+	u32 i, next_page = 0;
 
 	if (flags)
 		return -EINVAL;
@@ -573,8 +573,8 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
 
 	udata = &uvalue->data;
 	kdata = &kvalue->data;
-	image = st_map->image;
-	image_end = st_map->image + PAGE_SIZE;
+	image = st_map->image_pages[next_page++];
+	image_end = image + PAGE_SIZE;
 
 	module_type = btf_type_by_id(btf_vmlinux, st_ops_ids[IDX_MODULE_ID]);
 	for_each_member(i, t, member) {
@@ -657,6 +657,43 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
 							&st_ops->func_models[i],
 							*(void **)(st_ops->cfi_stubs + moff),
 							image, image_end);
+		if (err == -E2BIG) {
+			/* Use an additional page to try again.
+			 *
+			 * It may reuse pages allocated for the previous
+			 * failed calls.
+			 */
+			if (next_page >= st_map->image_pages_cnt) {
+				/* Allocate an additional page */
+				image_pages = krealloc(st_map->image_pages,
+						       (st_map->image_pages_cnt + 1) * sizeof(void *),
+						       GFP_KERNEL);
+				if (!image_pages) {
+					err = -ENOMEM;
+					goto reset_unlock;
+				}
+				st_map->image_pages = image_pages;
+
+				err = bpf_jit_charge_modmem(PAGE_SIZE);
+				if (err)
+					goto reset_unlock;
+
+				image = arch_alloc_bpf_trampoline(PAGE_SIZE);
+				if (!image) {
+					bpf_jit_uncharge_modmem(PAGE_SIZE);
+					err = -ENOMEM;
+					goto reset_unlock;
+				}
+				st_map->image_pages[st_map->image_pages_cnt++] = image;
+			}
+			image = st_map->image_pages[next_page++];
+			image_end = image + PAGE_SIZE;
+
+			err = bpf_struct_ops_prepare_trampoline(tlinks, link,
+								&st_ops->func_models[i],
+								*(void **)(st_ops->cfi_stubs + moff),
+								image, image_end);
+		}
 		if (err < 0)
 			goto reset_unlock;
 
@@ -667,6 +704,18 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
 		*(unsigned long *)(udata + moff) = prog->aux->id;
 	}
 
+	while (next_page < st_map->image_pages_cnt) {
+		/* Free unused pages
+		 *
+		 * The value can not be updated anymore if the value is not
+		 * rejected by st_ops->validate() or st_ops->reg().  So,
+		 * there is no reason to keep the unused pages.
+		 */
+		bpf_jit_uncharge_modmem(PAGE_SIZE);
+		image = st_map->image_pages[--st_map->image_pages_cnt];
+		arch_free_bpf_trampoline(image, PAGE_SIZE);
+	}
+
 	if (st_map->map.map_flags & BPF_F_LINK) {
 		err = 0;
 		if (st_ops->validate) {
@@ -674,7 +723,9 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
 			if (err)
 				goto reset_unlock;
 		}
-		arch_protect_bpf_trampoline(st_map->image, PAGE_SIZE);
+		for (i = 0; i < next_page; i++)
+			arch_protect_bpf_trampoline(st_map->image_pages[i],
+						    PAGE_SIZE);
 		/* Let bpf_link handle registration & unregistration.
 		 *
 		 * Pair with smp_load_acquire() during lookup_elem().
@@ -683,7 +734,8 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
 		goto unlock;
 	}
 
-	arch_protect_bpf_trampoline(st_map->image, PAGE_SIZE);
+	for (i = 0; i < next_page; i++)
+		arch_protect_bpf_trampoline(st_map->image_pages[i], PAGE_SIZE);
 	err = st_ops->reg(kdata);
 	if (likely(!err)) {
 		/* This refcnt increment on the map here after
@@ -706,7 +758,8 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
 	 * there was a race in registering the struct_ops (under the same name) to
 	 * a sub-system through different struct_ops's maps.
 	 */
-	arch_unprotect_bpf_trampoline(st_map->image, PAGE_SIZE);
+	for (i = 0; i < next_page; i++)
+		arch_unprotect_bpf_trampoline(st_map->image_pages[i], PAGE_SIZE);
 
 reset_unlock:
 	bpf_struct_ops_map_put_progs(st_map);
@@ -771,14 +824,15 @@ static void bpf_struct_ops_map_seq_show_elem(struct bpf_map *map, void *key,
 static void __bpf_struct_ops_map_free(struct bpf_map *map)
 {
 	struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
+	int i;
 
 	if (st_map->links)
 		bpf_struct_ops_map_put_progs(st_map);
 	bpf_map_area_free(st_map->links);
-	if (st_map->image) {
-		arch_free_bpf_trampoline(st_map->image, PAGE_SIZE);
-		bpf_jit_uncharge_modmem(PAGE_SIZE);
-	}
+	for (i = 0; i < st_map->image_pages_cnt; i++)
+		arch_free_bpf_trampoline(st_map->image_pages[i], PAGE_SIZE);
+	bpf_jit_uncharge_modmem(PAGE_SIZE * st_map->image_pages_cnt);
+	kfree(st_map->image_pages);
 	bpf_map_area_free(st_map->uvalue);
 	bpf_map_area_free(st_map);
 }
@@ -888,20 +942,27 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
 	st_map->st_ops_desc = st_ops_desc;
 	map = &st_map->map;
 
+	st_map->image_pages = kcalloc(1, sizeof(void *), GFP_KERNEL);
+	if (!st_map->image_pages) {
+		ret = -ENOMEM;
+		goto errout_free;
+	}
+
 	ret = bpf_jit_charge_modmem(PAGE_SIZE);
 	if (ret)
 		goto errout_free;
 
-	st_map->image = arch_alloc_bpf_trampoline(PAGE_SIZE);
-	if (!st_map->image) {
-		/* __bpf_struct_ops_map_free() uses st_map->image as flag
-		 * for "charged or not". In this case, we need to unchange
-		 * here.
+	st_map->image_pages[0] = arch_alloc_bpf_trampoline(PAGE_SIZE);
+	if (!st_map->image_pages[0]) {
+		/* __bpf_struct_ops_map_free() uses st_map->image_pages_cnt
+		 * to for uncharging a number of pages.  In this case, we
+		 * need to uncharge here.
 		 */
 		bpf_jit_uncharge_modmem(PAGE_SIZE);
 		ret = -ENOMEM;
 		goto errout_free;
 	}
+	st_map->image_pages_cnt = 1;
 	st_map->uvalue = bpf_map_area_alloc(vt->size, NUMA_NO_NODE);
 	st_map->links_cnt = btf_type_vlen(t);
 	st_map->links =
-- 
2.34.1


  reply	other threads:[~2024-02-16 18:28 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-16 18:28 [PATCH bpf-next 0/2] Allow struct_ops maps with a large number of programs thinker.li
2024-02-16 18:28 ` thinker.li [this message]
2024-02-20 21:47   ` [PATCH bpf-next 1/2] bpf: struct_ops supports more than one page for trampolines Martin KaFai Lau
2024-02-20 23:23     ` Kui-Feng Lee
2024-02-16 18:28 ` [PATCH bpf-next 2/2] selftests/bpf: Test struct_ops maps with a large number of program links thinker.li

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240216182828.201727-2-thinker.li@gmail.com \
    --to=thinker.li@gmail.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=kernel-team@meta.com \
    --cc=kuifeng@meta.com \
    --cc=martin.lau@linux.dev \
    --cc=sinquersw@gmail.com \
    --cc=song@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox