From: Leon Hwang <leon.hwang@linux.dev>
To: bpf@vger.kernel.org
Cc: ast@kernel.org, daniel@iogearbox.net, andrii@kernel.org,
yonghong.song@linux.dev, song@kernel.org, eddyz87@gmail.com,
qmo@kernel.org, dxu@dxuuu.xyz, leon.hwang@linux.dev,
kernel-patches-bot@fb.com
Subject: [PATCH bpf-next 2/4] bpf, libbpf: Support global percpu data
Date: Tue, 28 Jan 2025 00:21:56 +0800 [thread overview]
Message-ID: <20250127162158.84906-3-leon.hwang@linux.dev> (raw)
In-Reply-To: <20250127162158.84906-1-leon.hwang@linux.dev>
This patch introduces support for global percpu data in libbpf. A new
section named ".percpu" is added, similar to the existing ".data" section.
Internal maps are created for ".percpu" sections, which are then
initialized and populated accordingly.
The changes include:
* Introduction of the ".percpu" section in libbpf.
* Creation of internal maps for percpu data.
* Initialization and population of these maps.
This enhancement allows BPF programs to efficiently manage and access
percpu global data, improving performance for use cases that require
percpu buffer.
Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
tools/lib/bpf/libbpf.c | 172 ++++++++++++++++++++++++++++++++---------
1 file changed, 135 insertions(+), 37 deletions(-)
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 194809da51725..6da6004c5c84d 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -516,6 +516,7 @@ struct bpf_struct_ops {
};
#define DATA_SEC ".data"
+#define PERCPU_DATA_SEC ".percpu"
#define BSS_SEC ".bss"
#define RODATA_SEC ".rodata"
#define KCONFIG_SEC ".kconfig"
@@ -530,6 +531,7 @@ enum libbpf_map_type {
LIBBPF_MAP_BSS,
LIBBPF_MAP_RODATA,
LIBBPF_MAP_KCONFIG,
+ LIBBPF_MAP_PERCPU_DATA,
};
struct bpf_map_def {
@@ -562,6 +564,7 @@ struct bpf_map {
__u32 btf_value_type_id;
__u32 btf_vmlinux_value_type_id;
enum libbpf_map_type libbpf_type;
+ void *data;
void *mmaped;
struct bpf_struct_ops *st_ops;
struct bpf_map *inner_map;
@@ -640,6 +643,7 @@ enum sec_type {
SEC_DATA,
SEC_RODATA,
SEC_ST_OPS,
+ SEC_PERCPU_DATA,
};
struct elf_sec_desc {
@@ -1923,13 +1927,24 @@ static bool map_is_mmapable(struct bpf_object *obj, struct bpf_map *map)
return false;
}
+static void map_copy_data(struct bpf_map *map, const void *data)
+{
+ bool is_percpu_data = map->libbpf_type == LIBBPF_MAP_PERCPU_DATA;
+ size_t data_sz = map->def.value_size;
+
+ if (data)
+ memcpy(is_percpu_data ? map->data : map->mmaped, data, data_sz);
+}
+
static int
bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
const char *real_name, int sec_idx, void *data, size_t data_sz)
{
+ bool is_percpu_data = type == LIBBPF_MAP_PERCPU_DATA;
struct bpf_map_def *def;
struct bpf_map *map;
size_t mmap_sz;
+ size_t elem_sz;
int err;
map = bpf_object__add_map(obj);
@@ -1948,7 +1963,8 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
}
def = &map->def;
- def->type = BPF_MAP_TYPE_ARRAY;
+ def->type = is_percpu_data ? BPF_MAP_TYPE_PERCPU_ARRAY
+ : BPF_MAP_TYPE_ARRAY;
def->key_size = sizeof(int);
def->value_size = data_sz;
def->max_entries = 1;
@@ -1958,29 +1974,53 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
/* failures are fine because of maps like .rodata.str1.1 */
(void) map_fill_btf_type_info(obj, map);
- if (map_is_mmapable(obj, map))
- def->map_flags |= BPF_F_MMAPABLE;
+ pr_debug("map '%s' (global %sdata): at sec_idx %d, offset %zu, flags %x.\n",
+ map->name, is_percpu_data ? "percpu " : "", map->sec_idx,
+ map->sec_offset, def->map_flags);
- pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
- map->name, map->sec_idx, map->sec_offset, def->map_flags);
+ if (is_percpu_data) {
+ elem_sz = roundup(data_sz, 8);
- mmap_sz = bpf_map_mmap_sz(map);
- map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_ANONYMOUS, -1, 0);
- if (map->mmaped == MAP_FAILED) {
- err = -errno;
- map->mmaped = NULL;
- pr_warn("failed to alloc map '%s' content buffer: %s\n", map->name, errstr(err));
- zfree(&map->real_name);
- zfree(&map->name);
- return err;
- }
+ map->data = malloc(elem_sz);
+ if (!map->data) {
+ err = -ENOMEM;
+ pr_warn("map '%s': failed to alloc content buffer: %s\n",
+ map->name, errstr(err));
+ goto free_name;
+ }
- if (data)
- memcpy(map->mmaped, data, data_sz);
+ if (data) {
+ memcpy(map->data, data, data_sz);
+ if (data_sz != elem_sz)
+ memset(map->data + data_sz, 0, elem_sz - data_sz);
+ } else {
+ memset(map->data, 0, elem_sz);
+ }
+ } else {
+ if (map_is_mmapable(obj, map))
+ def->map_flags |= BPF_F_MMAPABLE;
+
+ mmap_sz = bpf_map_mmap_sz(map);
+ map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ if (map->mmaped == MAP_FAILED) {
+ err = -errno;
+ map->mmaped = NULL;
+ pr_warn("map '%s': failed to alloc content buffer: %s\n",
+ map->name, errstr(err));
+ goto free_name;
+ }
+
+ map_copy_data(map, data);
+ }
pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name);
return 0;
+
+free_name:
+ zfree(&map->real_name);
+ zfree(&map->name);
+ return err;
}
static int bpf_object__init_global_data_maps(struct bpf_object *obj)
@@ -2015,6 +2055,13 @@ static int bpf_object__init_global_data_maps(struct bpf_object *obj)
sec_desc->data->d_buf,
sec_desc->data->d_size);
break;
+ case SEC_PERCPU_DATA:
+ sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
+ err = bpf_object__init_internal_map(obj, LIBBPF_MAP_PERCPU_DATA,
+ sec_name, sec_idx,
+ sec_desc->data->d_buf,
+ sec_desc->data->d_size);
+ break;
case SEC_BSS:
sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
@@ -3934,6 +3981,11 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
sec_desc->sec_type = SEC_RODATA;
sec_desc->shdr = sh;
sec_desc->data = data;
+ } else if (strcmp(name, PERCPU_DATA_SEC) == 0 ||
+ str_has_pfx(name, PERCPU_DATA_SEC)) {
+ sec_desc->sec_type = SEC_PERCPU_DATA;
+ sec_desc->shdr = sh;
+ sec_desc->data = data;
} else if (strcmp(name, STRUCT_OPS_SEC) == 0 ||
strcmp(name, STRUCT_OPS_LINK_SEC) == 0 ||
strcmp(name, "?" STRUCT_OPS_SEC) == 0 ||
@@ -4453,6 +4505,7 @@ static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
case SEC_BSS:
case SEC_DATA:
case SEC_RODATA:
+ case SEC_PERCPU_DATA:
return true;
default:
return false;
@@ -4478,6 +4531,8 @@ bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
return LIBBPF_MAP_DATA;
case SEC_RODATA:
return LIBBPF_MAP_RODATA;
+ case SEC_PERCPU_DATA:
+ return LIBBPF_MAP_PERCPU_DATA;
default:
return LIBBPF_MAP_UNSPEC;
}
@@ -4795,7 +4850,7 @@ static int map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map)
/*
* LLVM annotates global data differently in BTF, that is,
- * only as '.data', '.bss' or '.rodata'.
+ * only as '.data', '.bss', '.percpu' or '.rodata'.
*/
if (!bpf_map__is_internal(map))
return -ENOENT;
@@ -5125,23 +5180,54 @@ static int
bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
{
enum libbpf_map_type map_type = map->libbpf_type;
- int err, zero = 0;
+ bool is_percpu_data = map_type == LIBBPF_MAP_PERCPU_DATA;
+ int err = 0, zero = 0;
+ void *data = NULL;
+ int num_cpus, i;
+ size_t data_sz;
+ size_t elem_sz;
size_t mmap_sz;
+ data_sz = map->def.value_size;
+ if (is_percpu_data) {
+ num_cpus = libbpf_num_possible_cpus();
+ if (num_cpus < 0) {
+ err = libbpf_err_errno(num_cpus);
+ pr_warn("map '%s': failed to get num_cpus: %s\n",
+ bpf_map__name(map), errstr(err));
+ return err;
+ }
+
+ elem_sz = roundup(data_sz, 8);
+ data_sz = elem_sz * num_cpus;
+ data = malloc(data_sz);
+ if (!data) {
+ err = -ENOMEM;
+ pr_warn("map '%s': failed to malloc memory: %s\n",
+ bpf_map__name(map), errstr(err));
+ return err;
+ }
+
+ for (i = 0; i < num_cpus; i++)
+ memcpy(data + i * elem_sz, map->data, elem_sz);
+ } else {
+ data = map->mmaped;
+ }
+
if (obj->gen_loader) {
bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps,
- map->mmaped, map->def.value_size);
+ data, data_sz);
if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG)
bpf_gen__map_freeze(obj->gen_loader, map - obj->maps);
- return 0;
+ goto free_data;
}
- err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
+ err = bpf_map_update_elem(map->fd, &zero, data, 0);
if (err) {
err = -errno;
pr_warn("map '%s': failed to set initial contents: %s\n",
bpf_map__name(map), errstr(err));
- return err;
+ goto free_data;
}
/* Freeze .rodata and .kconfig map as read-only from syscall side. */
@@ -5151,7 +5237,7 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
err = -errno;
pr_warn("map '%s': failed to freeze as read-only: %s\n",
bpf_map__name(map), errstr(err));
- return err;
+ goto free_data;
}
}
@@ -5178,7 +5264,7 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
err = -errno;
pr_warn("map '%s': failed to re-mmap() contents: %s\n",
bpf_map__name(map), errstr(err));
- return err;
+ goto free_data;
}
map->mmaped = mmaped;
} else if (map->mmaped) {
@@ -5186,7 +5272,10 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
map->mmaped = NULL;
}
- return 0;
+free_data:
+ if (is_percpu_data)
+ free(data);
+ return err;
}
static void bpf_map__destroy(struct bpf_map *map);
@@ -8120,7 +8209,9 @@ static int bpf_object__sanitize_maps(struct bpf_object *obj)
struct bpf_map *m;
bpf_object__for_each_map(m, obj) {
- if (!bpf_map__is_internal(m))
+ if (!bpf_map__is_internal(m) ||
+ /* percpu data map is internal and not-mmapable. */
+ m->libbpf_type == LIBBPF_MAP_PERCPU_DATA)
continue;
if (!kernel_supports(obj, FEAT_ARRAY_MMAP))
m->def.map_flags &= ~BPF_F_MMAPABLE;
@@ -9041,6 +9132,8 @@ static void bpf_map__destroy(struct bpf_map *map)
if (map->mmaped && map->mmaped != map->obj->arena_data)
munmap(map->mmaped, bpf_map_mmap_sz(map));
map->mmaped = NULL;
+ if (map->data)
+ zfree(&map->data);
if (map->st_ops) {
zfree(&map->st_ops->data);
@@ -10132,14 +10225,18 @@ int bpf_map__fd(const struct bpf_map *map)
static bool map_uses_real_name(const struct bpf_map *map)
{
- /* Since libbpf started to support custom .data.* and .rodata.* maps,
- * their user-visible name differs from kernel-visible name. Users see
- * such map's corresponding ELF section name as a map name.
- * This check distinguishes .data/.rodata from .data.* and .rodata.*
- * maps to know which name has to be returned to the user.
+ /* Since libbpf started to support custom .data.*, .percpu.* and
+ * .rodata.* maps, their user-visible name differs from kernel-visible
+ * name. Users see such map's corresponding ELF section name as a map
+ * name. This check distinguishes .data/.percpu/.rodata from .data.*,
+ * .percpu.* and .rodata.* maps to know which name has to be returned to
+ * the user.
*/
if (map->libbpf_type == LIBBPF_MAP_DATA && strcmp(map->real_name, DATA_SEC) != 0)
return true;
+ if (map->libbpf_type == LIBBPF_MAP_PERCPU_DATA &&
+ strcmp(map->real_name, PERCPU_DATA_SEC) != 0)
+ return true;
if (map->libbpf_type == LIBBPF_MAP_RODATA && strcmp(map->real_name, RODATA_SEC) != 0)
return true;
return false;
@@ -10348,7 +10445,8 @@ int bpf_map__set_initial_value(struct bpf_map *map,
if (map->obj->loaded || map->reused)
return libbpf_err(-EBUSY);
- if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG)
+ if ((!map->mmaped && !map->data) ||
+ map->libbpf_type == LIBBPF_MAP_KCONFIG)
return libbpf_err(-EINVAL);
if (map->def.type == BPF_MAP_TYPE_ARENA)
@@ -10358,7 +10456,7 @@ int bpf_map__set_initial_value(struct bpf_map *map,
if (size != actual_sz)
return libbpf_err(-EINVAL);
- memcpy(map->mmaped, data, size);
+ map_copy_data(map, data);
return 0;
}
@@ -10370,7 +10468,7 @@ void *bpf_map__initial_value(const struct bpf_map *map, size_t *psize)
return map->st_ops->data;
}
- if (!map->mmaped)
+ if (!map->mmaped && !map->data)
return NULL;
if (map->def.type == BPF_MAP_TYPE_ARENA)
@@ -10378,7 +10476,7 @@ void *bpf_map__initial_value(const struct bpf_map *map, size_t *psize)
else
*psize = map->def.value_size;
- return map->mmaped;
+ return map->libbpf_type == LIBBPF_MAP_PERCPU_DATA ? map->data : map->mmaped;
}
bool bpf_map__is_internal(const struct bpf_map *map)
--
2.47.1
next prev parent reply other threads:[~2025-01-27 16:22 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-01-27 16:21 [PATCH bpf-next 0/4] bpf: Introduce global percpu data Leon Hwang
2025-01-27 16:21 ` [PATCH bpf-next 1/4] " Leon Hwang
2025-02-06 0:09 ` Andrii Nakryiko
2025-02-07 9:42 ` Leon Hwang
2025-02-08 0:23 ` Alexei Starovoitov
2025-02-10 9:35 ` Leon Hwang
2025-01-27 16:21 ` Leon Hwang [this message]
2025-02-06 0:09 ` [PATCH bpf-next 2/4] bpf, libbpf: Support " Andrii Nakryiko
2025-02-07 9:48 ` Leon Hwang
2025-01-27 16:21 ` [PATCH bpf-next 3/4] bpf, bpftool: Generate skeleton for " Leon Hwang
2025-02-06 0:09 ` Andrii Nakryiko
2025-02-07 9:52 ` Leon Hwang
2025-01-27 16:21 ` [PATCH bpf-next 4/4] selftests/bpf: Add a case to test " Leon Hwang
2025-02-06 0:09 ` Andrii Nakryiko
2025-02-07 10:00 ` Leon Hwang
2025-02-07 19:48 ` Andrii Nakryiko
2025-02-10 9:52 ` Leon Hwang
2025-02-11 0:15 ` Andrii Nakryiko
2025-02-12 1:50 ` Leon Hwang
-- strict thread matches above, loose matches on Subject: below --
2025-02-13 16:06 [PATCH bpf-next 0/4] bpf: Introduce " Leon Hwang
2025-02-13 16:06 ` [PATCH bpf-next 2/4] bpf, libbpf: Support " Leon Hwang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250127162158.84906-3-leon.hwang@linux.dev \
--to=leon.hwang@linux.dev \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=dxu@dxuuu.xyz \
--cc=eddyz87@gmail.com \
--cc=kernel-patches-bot@fb.com \
--cc=qmo@kernel.org \
--cc=song@kernel.org \
--cc=yonghong.song@linux.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.