All of lore.kernel.org
 help / color / mirror / Atom feed
From: Leon Hwang <leon.hwang@linux.dev>
To: bpf@vger.kernel.org
Cc: ast@kernel.org, daniel@iogearbox.net, andrii@kernel.org,
	yonghong.song@linux.dev, song@kernel.org, eddyz87@gmail.com,
	leon.hwang@linux.dev, kernel-patches-bot@fb.com
Subject: [RFC PATCH bpf-next 1/2] bpf: Introduce global percpu data
Date: Mon, 13 Jan 2025 23:24:36 +0800	[thread overview]
Message-ID: <20250113152437.67196-2-leon.hwang@linux.dev> (raw)
In-Reply-To: <20250113152437.67196-1-leon.hwang@linux.dev>

This patch introduces global per-CPU data, inspired by commit
6316f78306c1 ("Merge branch 'support-global-data'"). It enables the
definition of global per-CPU variables in BPF, similar to the
DEFINE_PER_CPU() macro in the kernel[0].

For example, in BPF, it is able to define a global per-CPU variable like
this:

int percpu_data SEC(".data..percpu");

With this patch, tools like retsnoop[1] and bpflbr[2] can simplify their
BPF code for handling LBRs. The code can be updated from

static struct perf_branch_entry lbrs[1][MAX_LBR_ENTRIES] SEC(".data.lbrs");

to

static struct perf_branch_entry lbrs[MAX_LBR_ENTRIES] SEC(".data..percpu.lbrs");

This eliminates the need to retrieve the CPU ID using the
bpf_get_smp_processor_id() helper.

Additionally, by reusing global per-CPU variables, sharing information
between tail callers and callees or freplace callers and callees becomes
simpler compared to using percpu_array maps.

Links:
[0] https://github.com/torvalds/linux/blob/fbfd64d25c7af3b8695201ebc85efe90be28c5a3/include/linux/percpu-defs.h#L114
[1] https://github.com/anakryiko/retsnoop
[2] https://github.com/Asphaltt/bpflbr

Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
 kernel/bpf/arraymap.c  |  39 +++++++++++++-
 kernel/bpf/verifier.c  |  45 +++++++++++++++++
 tools/lib/bpf/libbpf.c | 112 ++++++++++++++++++++++++++++++++---------
 3 files changed, 171 insertions(+), 25 deletions(-)

diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index eb28c0f219ee4..f8c60d8331975 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -249,6 +249,40 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
 	return this_cpu_ptr(array->pptrs[index & array->index_mask]);
 }
 
+static int percpu_array_map_direct_value_addr(const struct bpf_map *map,
+					      u64 *imm, u32 off)
+{
+	struct bpf_array *array = container_of(map, struct bpf_array, map);
+
+	if (map->max_entries != 1)
+		return -EOPNOTSUPP;
+	if (off >= map->value_size)
+		return -EINVAL;
+	if (!bpf_jit_supports_percpu_insn())
+		return -EOPNOTSUPP;
+
+	*imm = (u64) array->pptrs[0];
+	return 0;
+}
+
+static int percpu_array_map_direct_value_meta(const struct bpf_map *map,
+					      u64 imm, u32 *off)
+{
+	struct bpf_array *array = container_of(map, struct bpf_array, map);
+	u64 base = (u64) array->pptrs[0];
+	u64 range = array->elem_size;
+
+	if (map->max_entries != 1)
+		return -EOPNOTSUPP;
+	if (imm < base || imm >= base + range)
+		return -ENOENT;
+	if (!bpf_jit_supports_percpu_insn())
+		return -EOPNOTSUPP;
+
+	*off = imm - base;
+	return 0;
+}
+
 /* emit BPF instructions equivalent to C code of percpu_array_map_lookup_elem() */
 static int percpu_array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
 {
@@ -534,7 +568,8 @@ static int array_map_check_btf(const struct bpf_map *map,
 
 	/* One exception for keyless BTF: .bss/.data/.rodata map */
 	if (btf_type_is_void(key_type)) {
-		if (map->map_type != BPF_MAP_TYPE_ARRAY ||
+		if ((map->map_type != BPF_MAP_TYPE_ARRAY &&
+		     map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY) ||
 		    map->max_entries != 1)
 			return -EINVAL;
 
@@ -815,6 +850,8 @@ const struct bpf_map_ops percpu_array_map_ops = {
 	.map_get_next_key = array_map_get_next_key,
 	.map_lookup_elem = percpu_array_map_lookup_elem,
 	.map_gen_lookup = percpu_array_map_gen_lookup,
+	.map_direct_value_addr = percpu_array_map_direct_value_addr,
+	.map_direct_value_meta = percpu_array_map_direct_value_meta,
 	.map_update_elem = array_map_update_elem,
 	.map_delete_elem = array_map_delete_elem,
 	.map_lookup_percpu_elem = percpu_array_map_lookup_percpu_elem,
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index b8ca227c78af1..94ce02a48ddc1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6809,6 +6809,8 @@ static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val,
 	u64 addr;
 	int err;
 
+	if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
+		return -EINVAL;
 	err = map->ops->map_direct_value_addr(map, &addr, off);
 	if (err)
 		return err;
@@ -7324,6 +7326,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 			/* if map is read-only, track its contents as scalars */
 			if (tnum_is_const(reg->var_off) &&
 			    bpf_map_is_rdonly(map) &&
+			    map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY &&
 			    map->ops->map_direct_value_addr) {
 				int map_off = off + reg->var_off.value;
 				u64 val = 0;
@@ -9140,6 +9143,11 @@ static int check_reg_const_str(struct bpf_verifier_env *env,
 		return -EACCES;
 	}
 
+	if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
+		verbose(env, "percpu_array map does not support direct string value access\n");
+		return -EINVAL;
+	}
+
 	err = check_map_access(env, regno, reg->off,
 			       map->value_size - reg->off, false,
 			       ACCESS_HELPER);
@@ -10751,6 +10759,11 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
 		return -EINVAL;
 	num_args = data_len_reg->var_off.value / 8;
 
+	if (fmt_map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
+		verbose(env, "percpu_array map does not support snprintf\n");
+		return -EINVAL;
+	}
+
 	/* fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const
 	 * and map_direct_value_addr is set.
 	 */
@@ -21304,6 +21317,38 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
 			goto next_insn;
 		}
 
+#ifdef CONFIG_SMP
+		if (insn->code == (BPF_LD | BPF_IMM | BPF_DW) &&
+		    (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
+		     insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE)) {
+			struct bpf_map *map;
+
+			aux = &env->insn_aux_data[i + delta];
+			map = env->used_maps[aux->map_index];
+			if (map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY)
+				goto next_insn;
+
+			/* Reuse the original ld_imm64 insn. And add one
+			 * mov64_percpu_reg insn.
+			 */
+
+			insn_buf[0] = insn[1];
+			insn_buf[1] = BPF_MOV64_PERCPU_REG(insn->dst_reg, insn->dst_reg);
+			cnt = 2;
+
+			i++;
+			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+			if (!new_prog)
+				return -ENOMEM;
+
+			delta    += cnt - 1;
+			env->prog = prog = new_prog;
+			insn      = new_prog->insnsi + i + delta;
+
+			goto next_insn;
+		}
+#endif
+
 		if (insn->code != (BPF_JMP | BPF_CALL))
 			goto next_insn;
 		if (insn->src_reg == BPF_PSEUDO_CALL)
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 6c262d0152f81..881174f4f90a4 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -516,6 +516,7 @@ struct bpf_struct_ops {
 };
 
 #define DATA_SEC ".data"
+#define PERCPU_DATA_SEC ".data..percpu"
 #define BSS_SEC ".bss"
 #define RODATA_SEC ".rodata"
 #define KCONFIG_SEC ".kconfig"
@@ -562,6 +563,8 @@ struct bpf_map {
 	__u32 btf_value_type_id;
 	__u32 btf_vmlinux_value_type_id;
 	enum libbpf_map_type libbpf_type;
+	int num_cpus;
+	void *data;
 	void *mmaped;
 	struct bpf_struct_ops *st_ops;
 	struct bpf_map *inner_map;
@@ -1923,11 +1926,35 @@ static bool map_is_mmapable(struct bpf_object *obj, struct bpf_map *map)
 	return false;
 }
 
+static bool map_is_percpu_data(struct bpf_map *map)
+{
+	return str_has_pfx(map->real_name, PERCPU_DATA_SEC);
+}
+
+static void map_copy_data(struct bpf_map *map, const void *data)
+{
+	bool is_percpu_data = map_is_percpu_data(map);
+	size_t data_sz = map->def.value_size;
+	size_t elem_sz = roundup(data_sz, 8);
+	int i;
+
+	if (!data)
+		return;
+
+	if (!is_percpu_data)
+		memcpy(map->mmaped, data, data_sz);
+	else
+		for (i = 0; i < map->num_cpus; i++)
+			memcpy(map->data + i*elem_sz, data, data_sz);
+}
+
 static int
 bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
 			      const char *real_name, int sec_idx, void *data, size_t data_sz)
 {
+	bool is_percpu_data = str_has_pfx(real_name, PERCPU_DATA_SEC);
 	struct bpf_map_def *def;
+	const char *data_desc;
 	struct bpf_map *map;
 	size_t mmap_sz;
 	int err;
@@ -1948,7 +1975,8 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
 	}
 
 	def = &map->def;
-	def->type = BPF_MAP_TYPE_ARRAY;
+	def->type = is_percpu_data ? BPF_MAP_TYPE_PERCPU_ARRAY
+				   : BPF_MAP_TYPE_ARRAY;
 	def->key_size = sizeof(int);
 	def->value_size = data_sz;
 	def->max_entries = 1;
@@ -1958,29 +1986,57 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
 	/* failures are fine because of maps like .rodata.str1.1 */
 	(void) map_fill_btf_type_info(obj, map);
 
-	if (map_is_mmapable(obj, map))
-		def->map_flags |= BPF_F_MMAPABLE;
+	data_desc = is_percpu_data ? "percpu " : "";
+	pr_debug("map '%s' (global %sdata): at sec_idx %d, offset %zu, flags %x.\n",
+		 map->name, data_desc, map->sec_idx, map->sec_offset,
+		 def->map_flags);
 
-	pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
-		 map->name, map->sec_idx, map->sec_offset, def->map_flags);
+	if (is_percpu_data) {
+		map->num_cpus = libbpf_num_possible_cpus();
+		if (map->num_cpus < 0) {
+			err = errno;
+			pr_warn("failed to get possible cpus\n");
+			goto free_name;
+		}
 
-	mmap_sz = bpf_map_mmap_sz(map);
-	map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
-			   MAP_SHARED | MAP_ANONYMOUS, -1, 0);
-	if (map->mmaped == MAP_FAILED) {
-		err = -errno;
-		map->mmaped = NULL;
-		pr_warn("failed to alloc map '%s' content buffer: %s\n", map->name, errstr(err));
-		zfree(&map->real_name);
-		zfree(&map->name);
-		return err;
-	}
+		map->data = calloc(map->num_cpus, roundup(data_sz, 8));
+		if (!map->data) {
+			err = -ENOMEM;
+			pr_warn("failed to alloc percpu map '%s' content buffer: %s\n",
+				map->name, errstr(err));
+			goto free_name;
+		}
 
-	if (data)
-		memcpy(map->mmaped, data, data_sz);
+		if (data)
+			map_copy_data(map, data);
+		else
+			memset(map->data, 0, map->num_cpus*roundup(data_sz, 8));
+	} else {
+		if (map_is_mmapable(obj, map))
+			def->map_flags |= BPF_F_MMAPABLE;
+
+		mmap_sz = bpf_map_mmap_sz(map);
+		map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
+				   MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+		if (map->mmaped == MAP_FAILED) {
+			err = -errno;
+			map->mmaped = NULL;
+			pr_warn("failed to alloc map '%s' content buffer: %s\n",
+				map->name, errstr(err));
+			goto free_name;
+		}
+
+		if (data)
+			memcpy(map->mmaped, data, data_sz);
+	}
 
 	pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name);
 	return 0;
+
+free_name:
+	zfree(&map->real_name);
+	zfree(&map->name);
+	return err;
 }
 
 static int bpf_object__init_global_data_maps(struct bpf_object *obj)
@@ -5127,16 +5183,21 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
 	enum libbpf_map_type map_type = map->libbpf_type;
 	int err, zero = 0;
 	size_t mmap_sz;
+	size_t data_sz;
+	void *data;
 
+	data_sz = map_is_percpu_data(map) ? roundup(map->def.value_size, 8)*map->num_cpus
+					  : map->def.value_size;
+	data = map_is_percpu_data(map) ? map->data : map->mmaped;
 	if (obj->gen_loader) {
 		bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps,
-					 map->mmaped, map->def.value_size);
+					 data, data_sz);
 		if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG)
 			bpf_gen__map_freeze(obj->gen_loader, map - obj->maps);
 		return 0;
 	}
 
-	err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
+	err = bpf_map_update_elem(map->fd, &zero, data, 0);
 	if (err) {
 		err = -errno;
 		pr_warn("map '%s': failed to set initial contents: %s\n",
@@ -9041,6 +9102,8 @@ static void bpf_map__destroy(struct bpf_map *map)
 	if (map->mmaped && map->mmaped != map->obj->arena_data)
 		munmap(map->mmaped, bpf_map_mmap_sz(map));
 	map->mmaped = NULL;
+	if (map->data)
+		zfree(&map->data);
 
 	if (map->st_ops) {
 		zfree(&map->st_ops->data);
@@ -10348,7 +10411,8 @@ int bpf_map__set_initial_value(struct bpf_map *map,
 	if (map->obj->loaded || map->reused)
 		return libbpf_err(-EBUSY);
 
-	if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG)
+	if ((!map->mmaped && !map->data) ||
+	    map->libbpf_type == LIBBPF_MAP_KCONFIG)
 		return libbpf_err(-EINVAL);
 
 	if (map->def.type == BPF_MAP_TYPE_ARENA)
@@ -10358,7 +10422,7 @@ int bpf_map__set_initial_value(struct bpf_map *map,
 	if (size != actual_sz)
 		return libbpf_err(-EINVAL);
 
-	memcpy(map->mmaped, data, size);
+	map_copy_data(map, data);
 	return 0;
 }
 
@@ -10370,7 +10434,7 @@ void *bpf_map__initial_value(const struct bpf_map *map, size_t *psize)
 		return map->st_ops->data;
 	}
 
-	if (!map->mmaped)
+	if ((!map->mmaped && !map->data))
 		return NULL;
 
 	if (map->def.type == BPF_MAP_TYPE_ARENA)
@@ -10378,7 +10442,7 @@ void *bpf_map__initial_value(const struct bpf_map *map, size_t *psize)
 	else
 		*psize = map->def.value_size;
 
-	return map->mmaped;
+	return map->def.type == BPF_MAP_TYPE_PERCPU_ARRAY ? map->data : map->mmaped;
 }
 
 bool bpf_map__is_internal(const struct bpf_map *map)
-- 
2.47.1


  reply	other threads:[~2025-01-13 15:25 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-01-13 15:24 [RFC PATCH bpf-next 0/2] bpf: Introduce global percpu data Leon Hwang
2025-01-13 15:24 ` Leon Hwang [this message]
2025-01-14 23:10   ` [RFC PATCH bpf-next 1/2] " Andrii Nakryiko
2025-01-16  7:22     ` Leon Hwang
2025-01-16 23:37       ` Andrii Nakryiko
2025-01-17  6:24         ` Leon Hwang
2025-01-13 15:24 ` [RFC PATCH bpf-next 2/2] selftests/bpf: Add a case to test " Leon Hwang
2025-01-13 16:58 ` [RFC PATCH bpf-next 0/2] bpf: Introduce " Daniel Xu
2025-01-14  6:35   ` Leon Hwang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250113152437.67196-2-leon.hwang@linux.dev \
    --to=leon.hwang@linux.dev \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=eddyz87@gmail.com \
    --cc=kernel-patches-bot@fb.com \
    --cc=song@kernel.org \
    --cc=yonghong.song@linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.