public inbox for bpf@vger.kernel.org
 help / color / mirror / Atom feed
From: Leon Hwang <leon.hwang@linux.dev>
To: bpf@vger.kernel.org
Cc: ast@kernel.org, andrii@kernel.org, daniel@iogearbox.net,
	yonghong.song@linux.dev, song@kernel.org, eddyz87@gmail.com,
	qmo@kernel.org, dxu@dxuuu.xyz, leon.hwang@linux.dev,
	kernel-patches-bot@fb.com
Subject: [PATCH bpf-next v3 2/4] bpf, libbpf: Support global percpu data
Date: Tue, 27 May 2025 00:21:44 +0800	[thread overview]
Message-ID: <20250526162146.24429-3-leon.hwang@linux.dev> (raw)
In-Reply-To: <20250526162146.24429-1-leon.hwang@linux.dev>

This patch introduces support for global percpu data in libbpf by adding a
new ".data..percpu" section, similar to ".data". It enables efficient
handling of percpu global variables in bpf programs.

This enhancement improves performance for workloads that benefit from
percpu storage.

Meanwhile, add bpf_map__is_internal_percpu() API to check whether the map
is an internal map used for global percpu variables.

Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
 tools/lib/bpf/libbpf.c   | 102 +++++++++++++++++++++++++++++++--------
 tools/lib/bpf/libbpf.h   |   9 ++++
 tools/lib/bpf/libbpf.map |   1 +
 3 files changed, 91 insertions(+), 21 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index e9c641a2fb203..65f0df09ac6d8 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -518,6 +518,7 @@ struct bpf_struct_ops {
 };
 
 #define DATA_SEC ".data"
+#define PERCPU_DATA_SEC ".data..percpu"
 #define BSS_SEC ".bss"
 #define RODATA_SEC ".rodata"
 #define KCONFIG_SEC ".kconfig"
@@ -532,6 +533,7 @@ enum libbpf_map_type {
 	LIBBPF_MAP_BSS,
 	LIBBPF_MAP_RODATA,
 	LIBBPF_MAP_KCONFIG,
+	LIBBPF_MAP_PERCPU_DATA,
 };
 
 struct bpf_map_def {
@@ -642,6 +644,7 @@ enum sec_type {
 	SEC_DATA,
 	SEC_RODATA,
 	SEC_ST_OPS,
+	SEC_PERCPU_DATA,
 };
 
 struct elf_sec_desc {
@@ -1902,7 +1905,7 @@ static bool map_is_mmapable(struct bpf_object *obj, struct bpf_map *map)
 	struct btf_var_secinfo *vsi;
 	int i, n;
 
-	if (!map->btf_value_type_id)
+	if (!map->btf_value_type_id || map->libbpf_type == LIBBPF_MAP_PERCPU_DATA)
 		return false;
 
 	t = btf__type_by_id(obj->btf, map->btf_value_type_id);
@@ -1926,6 +1929,7 @@ static int
 bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
 			      const char *real_name, int sec_idx, void *data, size_t data_sz)
 {
+	bool is_percpu = type == LIBBPF_MAP_PERCPU_DATA;
 	struct bpf_map_def *def;
 	struct bpf_map *map;
 	size_t mmap_sz;
@@ -1947,9 +1951,9 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
 	}
 
 	def = &map->def;
-	def->type = BPF_MAP_TYPE_ARRAY;
+	def->type = is_percpu ? BPF_MAP_TYPE_PERCPU_ARRAY : BPF_MAP_TYPE_ARRAY;
 	def->key_size = sizeof(int);
-	def->value_size = data_sz;
+	def->value_size = is_percpu ? roundup(data_sz, 8) : data_sz;
 	def->max_entries = 1;
 	def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG
 		? BPF_F_RDONLY_PROG : 0;
@@ -1960,10 +1964,11 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
 	if (map_is_mmapable(obj, map))
 		def->map_flags |= BPF_F_MMAPABLE;
 
-	pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
-		 map->name, map->sec_idx, map->sec_offset, def->map_flags);
+	pr_debug("map '%s' (global %sdata): at sec_idx %d, offset %zu, flags %x.\n",
+		 map->name, is_percpu ? "percpu " : "", map->sec_idx,
+		 map->sec_offset, def->map_flags);
 
-	mmap_sz = bpf_map_mmap_sz(map);
+	mmap_sz = is_percpu ? def->value_size : bpf_map_mmap_sz(map);
 	map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
 			   MAP_SHARED | MAP_ANONYMOUS, -1, 0);
 	if (map->mmaped == MAP_FAILED) {
@@ -1999,6 +2004,13 @@ static int bpf_object__init_global_data_maps(struct bpf_object *obj)
 			continue;
 
 		switch (sec_desc->sec_type) {
+		case SEC_PERCPU_DATA:
+			sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
+			err = bpf_object__init_internal_map(obj, LIBBPF_MAP_PERCPU_DATA,
+							    sec_name, sec_idx,
+							    sec_desc->data->d_buf,
+							    sec_desc->data->d_size);
+			break;
 		case SEC_DATA:
 			sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
 			err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA,
@@ -3363,6 +3375,10 @@ static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
 		fixup_offsets = true;
 	}
 
+	/* .data..percpu DATASEC must have __aligned(8) size. */
+	if (strcmp(sec_name, PERCPU_DATA_SEC) == 0 || str_has_pfx(sec_name, PERCPU_DATA_SEC))
+		t->size = roundup(t->size, 8);
+
 	for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) {
 		const struct btf_type *t_var;
 		struct btf_var *var;
@@ -3923,6 +3939,11 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
 				err = bpf_object__add_programs(obj, data, name, idx);
 				if (err)
 					return err;
+			} else if (strcmp(name, PERCPU_DATA_SEC) == 0 ||
+				   str_has_pfx(name, PERCPU_DATA_SEC)) {
+				sec_desc->sec_type = SEC_PERCPU_DATA;
+				sec_desc->shdr = sh;
+				sec_desc->data = data;
 			} else if (strcmp(name, DATA_SEC) == 0 ||
 				   str_has_pfx(name, DATA_SEC ".")) {
 				sec_desc->sec_type = SEC_DATA;
@@ -4452,6 +4473,7 @@ static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
 	case SEC_BSS:
 	case SEC_DATA:
 	case SEC_RODATA:
+	case SEC_PERCPU_DATA:
 		return true;
 	default:
 		return false;
@@ -4477,6 +4499,8 @@ bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
 		return LIBBPF_MAP_DATA;
 	case SEC_RODATA:
 		return LIBBPF_MAP_RODATA;
+	case SEC_PERCPU_DATA:
+		return LIBBPF_MAP_PERCPU_DATA;
 	default:
 		return LIBBPF_MAP_UNSPEC;
 	}
@@ -4794,7 +4818,7 @@ static int map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map)
 
 	/*
 	 * LLVM annotates global data differently in BTF, that is,
-	 * only as '.data', '.bss' or '.rodata'.
+	 * only as '.data', '.bss', '.rodata' or '.data..percpu'.
 	 */
 	if (!bpf_map__is_internal(map))
 		return -ENOENT;
@@ -5129,23 +5153,47 @@ static int
 bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
 {
 	enum libbpf_map_type map_type = map->libbpf_type;
-	int err, zero = 0;
-	size_t mmap_sz;
+	bool is_percpu = map_type == LIBBPF_MAP_PERCPU_DATA;
+	int err = 0, zero = 0, num_cpus, i;
+	size_t data_sz, elem_sz, mmap_sz;
+	void *data = NULL;
+
+	data_sz = map->def.value_size;
+	if (is_percpu) {
+		num_cpus = libbpf_num_possible_cpus();
+		if (num_cpus < 0) {
+			err = num_cpus;
+			return err;
+		}
+
+		data_sz = data_sz * num_cpus;
+		data = malloc(data_sz);
+		if (!data) {
+			err = -ENOMEM;
+			return err;
+		}
+
+		elem_sz = map->def.value_size;
+		for (i = 0; i < num_cpus; i++)
+			memcpy(data + i * elem_sz, map->mmaped, elem_sz);
+	} else {
+		data = map->mmaped;
+	}
 
 	if (obj->gen_loader) {
 		bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps,
-					 map->mmaped, map->def.value_size);
+					 data, data_sz);
 		if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG)
 			bpf_gen__map_freeze(obj->gen_loader, map - obj->maps);
-		return 0;
+		goto free_data;
 	}
 
-	err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
+	err = bpf_map_update_elem(map->fd, &zero, data, 0);
 	if (err) {
 		err = -errno;
 		pr_warn("map '%s': failed to set initial contents: %s\n",
 			bpf_map__name(map), errstr(err));
-		return err;
+		goto free_data;
 	}
 
 	/* Freeze .rodata and .kconfig map as read-only from syscall side. */
@@ -5155,7 +5203,7 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
 			err = -errno;
 			pr_warn("map '%s': failed to freeze as read-only: %s\n",
 				bpf_map__name(map), errstr(err));
-			return err;
+			goto free_data;
 		}
 	}
 
@@ -5182,7 +5230,7 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
 			err = -errno;
 			pr_warn("map '%s': failed to re-mmap() contents: %s\n",
 				bpf_map__name(map), errstr(err));
-			return err;
+			goto free_data;
 		}
 		map->mmaped = mmaped;
 	} else if (map->mmaped) {
@@ -5190,7 +5238,10 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
 		map->mmaped = NULL;
 	}
 
-	return 0;
+free_data:
+	if (is_percpu)
+		free(data);
+	return err;
 }
 
 static void bpf_map__destroy(struct bpf_map *map);
@@ -10214,16 +10265,20 @@ int bpf_map__fd(const struct bpf_map *map)
 
 static bool map_uses_real_name(const struct bpf_map *map)
 {
-	/* Since libbpf started to support custom .data.* and .rodata.* maps,
-	 * their user-visible name differs from kernel-visible name. Users see
-	 * such map's corresponding ELF section name as a map name.
-	 * This check distinguishes .data/.rodata from .data.* and .rodata.*
-	 * maps to know which name has to be returned to the user.
+	/* Since libbpf started to support custom .data.*, .rodata.* and
+	 * .data..percpu.* maps, their user-visible name differs from
+	 * kernel-visible name. Users see such map's corresponding ELF section
+	 * name as a map name. This check distinguishes
+	 * .data/.rodata/.data..percpu from .data.*, .rodata.* and
+	 * .data..percpu.* maps to know which name has to be returned to the
+	 * user.
 	 */
 	if (map->libbpf_type == LIBBPF_MAP_DATA && strcmp(map->real_name, DATA_SEC) != 0)
 		return true;
 	if (map->libbpf_type == LIBBPF_MAP_RODATA && strcmp(map->real_name, RODATA_SEC) != 0)
 		return true;
+	if (map->libbpf_type == LIBBPF_MAP_PERCPU_DATA && strcmp(map->real_name, PERCPU_DATA_SEC) != 0)
+		return true;
 	return false;
 }
 
@@ -10468,6 +10523,11 @@ bool bpf_map__is_internal(const struct bpf_map *map)
 	return map->libbpf_type != LIBBPF_MAP_UNSPEC;
 }
 
+bool bpf_map__is_internal_percpu(const struct bpf_map *map)
+{
+	return map->libbpf_type == LIBBPF_MAP_PERCPU_DATA;
+}
+
 __u32 bpf_map__ifindex(const struct bpf_map *map)
 {
 	return map->map_ifindex;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index d39f19c8396dc..db5468f78b090 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -1091,6 +1091,15 @@ LIBBPF_API void *bpf_map__initial_value(const struct bpf_map *map, size_t *psize
  */
 LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map);
 
+/**
+ * @brief **bpf_map__is_internal_percpu()** tells the caller whether or not
+ * the passed map is an internal map used for global percpu variables.
+ * @param map the bpf_map
+ * @return true, if the map is an internal map used for global percpu
+ * variables; false, otherwise
+ */
+LIBBPF_API bool bpf_map__is_internal_percpu(const struct bpf_map *map);
+
 /**
  * @brief **bpf_map__set_pin_path()** sets the path attribute that tells where the
  * BPF map should be pinned. This does not actually create the 'pin'.
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 1205f9a4fe048..1c239ac88c699 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -443,4 +443,5 @@ LIBBPF_1.6.0 {
 		bpf_program__line_info_cnt;
 		btf__add_decl_attr;
 		btf__add_type_attr;
+		bpf_map__is_internal_percpu;
 } LIBBPF_1.5.0;
-- 
2.49.0


  parent reply	other threads:[~2025-05-26 16:22 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-05-26 16:21 [PATCH bpf-next v3 0/4] bpf: Introduce global percpu data Leon Hwang
2025-05-26 16:21 ` [PATCH bpf-next v3 1/4] " Leon Hwang
2025-05-27 22:31   ` Andrii Nakryiko
2025-05-29  2:03     ` Leon Hwang
2025-05-26 16:21 ` Leon Hwang [this message]
2025-05-27 22:31   ` [PATCH bpf-next v3 2/4] bpf, libbpf: Support " Andrii Nakryiko
2025-05-29  2:24     ` Leon Hwang
2025-05-27 22:40   ` Alexei Starovoitov
2025-05-27 23:25     ` Andrii Nakryiko
2025-05-28  2:35       ` Alexei Starovoitov
2025-05-28 16:05         ` Andrii Nakryiko
2025-05-29  2:43           ` Leon Hwang
2025-06-02 23:50             ` Andrii Nakryiko
2025-06-03  2:45               ` Leon Hwang
2025-06-05 16:29                 ` Andrii Nakryiko
2025-05-26 16:21 ` [PATCH bpf-next v3 3/4] bpf, bpftool: Generate skeleton for " Leon Hwang
2025-05-27 22:31   ` Andrii Nakryiko
2025-05-29  2:56     ` Leon Hwang
2025-06-02 23:50       ` Andrii Nakryiko
2025-06-03  2:47         ` Leon Hwang
2025-05-26 16:21 ` [PATCH bpf-next v3 4/4] selftests/bpf: Add cases to test " Leon Hwang
2025-05-27 22:31 ` [PATCH bpf-next v3 0/4] bpf: Introduce " Andrii Nakryiko
2025-05-28 17:10   ` Yonghong Song
2025-05-29  1:59     ` Leon Hwang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250526162146.24429-3-leon.hwang@linux.dev \
    --to=leon.hwang@linux.dev \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=dxu@dxuuu.xyz \
    --cc=eddyz87@gmail.com \
    --cc=kernel-patches-bot@fb.com \
    --cc=qmo@kernel.org \
    --cc=song@kernel.org \
    --cc=yonghong.song@linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox