* [PATCH bpf-next 1/4] bpf: Introduce global percpu data
2025-01-27 16:21 [PATCH bpf-next 0/4] bpf: Introduce global percpu data Leon Hwang
@ 2025-01-27 16:21 ` Leon Hwang
2025-02-06 0:09 ` Andrii Nakryiko
2025-02-08 0:23 ` Alexei Starovoitov
2025-01-27 16:21 ` [PATCH bpf-next 2/4] bpf, libbpf: Support " Leon Hwang
` (2 subsequent siblings)
3 siblings, 2 replies; 20+ messages in thread
From: Leon Hwang @ 2025-01-27 16:21 UTC (permalink / raw)
To: bpf
Cc: ast, daniel, andrii, yonghong.song, song, eddyz87, qmo, dxu,
leon.hwang, kernel-patches-bot
This patch introduces global percpu data, inspired by commit
6316f78306c1 ("Merge branch 'support-global-data'"). It enables the
definition of global percpu variables in BPF, similar to the
DEFINE_PER_CPU() macro in the kernel[0].
For example, in BPF, it is able to define a global percpu variable like
this:
int percpu_data SEC(".percpu");
With this patch, tools like retsnoop[1] and bpflbr[2] can simplify their
BPF code for handling LBRs. The code can be updated from
static struct perf_branch_entry lbrs[1][MAX_LBR_ENTRIES] SEC(".data.lbrs");
to
static struct perf_branch_entry lbrs[MAX_LBR_ENTRIES] SEC(".percpu.lbrs");
This eliminates the need to retrieve the CPU ID using the
bpf_get_smp_processor_id() helper.
Additionally, by reusing global percpu data map, sharing information
between tail callers and callees or freplace callers and callees becomes
simpler compared to reusing percpu_array maps.
Links:
[0] https://github.com/torvalds/linux/blob/fbfd64d25c7af3b8695201ebc85efe90be28c5a3/include/linux/percpu-defs.h#L114
[1] https://github.com/anakryiko/retsnoop
[2] https://github.com/Asphaltt/bpflbr
Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
kernel/bpf/arraymap.c | 39 ++++++++++++++++++++++++++++++++++++-
kernel/bpf/verifier.c | 45 +++++++++++++++++++++++++++++++++++++++++++
2 files changed, 83 insertions(+), 1 deletion(-)
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index eb28c0f219ee4..f8c60d8331975 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -249,6 +249,40 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
return this_cpu_ptr(array->pptrs[index & array->index_mask]);
}
+static int percpu_array_map_direct_value_addr(const struct bpf_map *map,
+ u64 *imm, u32 off)
+{
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+
+ if (map->max_entries != 1)
+ return -EOPNOTSUPP;
+ if (off >= map->value_size)
+ return -EINVAL;
+ if (!bpf_jit_supports_percpu_insn())
+ return -EOPNOTSUPP;
+
+ *imm = (u64) array->pptrs[0];
+ return 0;
+}
+
+static int percpu_array_map_direct_value_meta(const struct bpf_map *map,
+ u64 imm, u32 *off)
+{
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+ u64 base = (u64) array->pptrs[0];
+ u64 range = array->elem_size;
+
+ if (map->max_entries != 1)
+ return -EOPNOTSUPP;
+ if (imm < base || imm >= base + range)
+ return -ENOENT;
+ if (!bpf_jit_supports_percpu_insn())
+ return -EOPNOTSUPP;
+
+ *off = imm - base;
+ return 0;
+}
+
/* emit BPF instructions equivalent to C code of percpu_array_map_lookup_elem() */
static int percpu_array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
{
@@ -534,7 +568,8 @@ static int array_map_check_btf(const struct bpf_map *map,
/* One exception for keyless BTF: .bss/.data/.rodata map */
if (btf_type_is_void(key_type)) {
- if (map->map_type != BPF_MAP_TYPE_ARRAY ||
+ if ((map->map_type != BPF_MAP_TYPE_ARRAY &&
+ map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY) ||
map->max_entries != 1)
return -EINVAL;
@@ -815,6 +850,8 @@ const struct bpf_map_ops percpu_array_map_ops = {
.map_get_next_key = array_map_get_next_key,
.map_lookup_elem = percpu_array_map_lookup_elem,
.map_gen_lookup = percpu_array_map_gen_lookup,
+ .map_direct_value_addr = percpu_array_map_direct_value_addr,
+ .map_direct_value_meta = percpu_array_map_direct_value_meta,
.map_update_elem = array_map_update_elem,
.map_delete_elem = array_map_delete_elem,
.map_lookup_percpu_elem = percpu_array_map_lookup_percpu_elem,
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 9971c03adfd5d..9d99497c2b94c 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6810,6 +6810,8 @@ static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val,
u64 addr;
int err;
+ if (map->map_type != BPF_MAP_TYPE_ARRAY)
+ return -EINVAL;
err = map->ops->map_direct_value_addr(map, &addr, off);
if (err)
return err;
@@ -7322,6 +7324,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
/* if map is read-only, track its contents as scalars */
if (tnum_is_const(reg->var_off) &&
bpf_map_is_rdonly(map) &&
+ map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY &&
map->ops->map_direct_value_addr) {
int map_off = off + reg->var_off.value;
u64 val = 0;
@@ -9128,6 +9131,11 @@ static int check_reg_const_str(struct bpf_verifier_env *env,
return -EACCES;
}
+ if (map->map_type != BPF_MAP_TYPE_ARRAY) {
+ verbose(env, "only array map supports direct string value access\n");
+ return -EINVAL;
+ }
+
err = check_map_access(env, regno, reg->off,
map->value_size - reg->off, false,
ACCESS_HELPER);
@@ -10802,6 +10810,11 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
return -EINVAL;
num_args = data_len_reg->var_off.value / 8;
+ if (fmt_map->map_type != BPF_MAP_TYPE_ARRAY) {
+ verbose(env, "only array map supports snprintf\n");
+ return -EINVAL;
+ }
+
/* fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const
* and map_direct_value_addr is set.
*/
@@ -21381,6 +21394,38 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
goto next_insn;
}
+#ifdef CONFIG_SMP
+ if (insn->code == (BPF_LD | BPF_IMM | BPF_DW) &&
+ (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
+ insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE)) {
+ struct bpf_map *map;
+
+ aux = &env->insn_aux_data[i + delta];
+ map = env->used_maps[aux->map_index];
+ if (map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY)
+ goto next_insn;
+
+ /* Reuse the original ld_imm64 insn. And add one
+ * mov64_percpu_reg insn.
+ */
+
+ insn_buf[0] = insn[1];
+ insn_buf[1] = BPF_MOV64_PERCPU_REG(insn->dst_reg, insn->dst_reg);
+ cnt = 2;
+
+ i++;
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+
+ goto next_insn;
+ }
+#endif
+
if (insn->code != (BPF_JMP | BPF_CALL))
goto next_insn;
if (insn->src_reg == BPF_PSEUDO_CALL)
--
2.47.1
^ permalink raw reply related [flat|nested] 20+ messages in thread* Re: [PATCH bpf-next 1/4] bpf: Introduce global percpu data
2025-01-27 16:21 ` [PATCH bpf-next 1/4] " Leon Hwang
@ 2025-02-06 0:09 ` Andrii Nakryiko
2025-02-07 9:42 ` Leon Hwang
2025-02-08 0:23 ` Alexei Starovoitov
1 sibling, 1 reply; 20+ messages in thread
From: Andrii Nakryiko @ 2025-02-06 0:09 UTC (permalink / raw)
To: Leon Hwang
Cc: bpf, ast, daniel, andrii, yonghong.song, song, eddyz87, qmo, dxu,
kernel-patches-bot
On Mon, Jan 27, 2025 at 8:22 AM Leon Hwang <leon.hwang@linux.dev> wrote:
>
> This patch introduces global percpu data, inspired by commit
> 6316f78306c1 ("Merge branch 'support-global-data'"). It enables the
> definition of global percpu variables in BPF, similar to the
> DEFINE_PER_CPU() macro in the kernel[0].
>
> For example, in BPF, it is able to define a global percpu variable like
> this:
>
> int percpu_data SEC(".percpu");
>
> With this patch, tools like retsnoop[1] and bpflbr[2] can simplify their
> BPF code for handling LBRs. The code can be updated from
>
> static struct perf_branch_entry lbrs[1][MAX_LBR_ENTRIES] SEC(".data.lbrs");
>
> to
>
> static struct perf_branch_entry lbrs[MAX_LBR_ENTRIES] SEC(".percpu.lbrs");
>
> This eliminates the need to retrieve the CPU ID using the
> bpf_get_smp_processor_id() helper.
>
> Additionally, by reusing global percpu data map, sharing information
> between tail callers and callees or freplace callers and callees becomes
> simpler compared to reusing percpu_array maps.
>
> Links:
> [0] https://github.com/torvalds/linux/blob/fbfd64d25c7af3b8695201ebc85efe90be28c5a3/include/linux/percpu-defs.h#L114
> [1] https://github.com/anakryiko/retsnoop
> [2] https://github.com/Asphaltt/bpflbr
>
> Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
> ---
> kernel/bpf/arraymap.c | 39 ++++++++++++++++++++++++++++++++++++-
> kernel/bpf/verifier.c | 45 +++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 83 insertions(+), 1 deletion(-)
>
[...]
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 9971c03adfd5d..9d99497c2b94c 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -6810,6 +6810,8 @@ static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val,
> u64 addr;
> int err;
>
> + if (map->map_type != BPF_MAP_TYPE_ARRAY)
> + return -EINVAL;
> err = map->ops->map_direct_value_addr(map, &addr, off);
> if (err)
> return err;
> @@ -7322,6 +7324,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
> /* if map is read-only, track its contents as scalars */
> if (tnum_is_const(reg->var_off) &&
> bpf_map_is_rdonly(map) &&
> + map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY &&
shouldn't this rather be a safer `map->map_type == BPF_MAP_TYPE_ARRAY` check?
> map->ops->map_direct_value_addr) {
> int map_off = off + reg->var_off.value;
> u64 val = 0;
[...]
^ permalink raw reply [flat|nested] 20+ messages in thread* Re: [PATCH bpf-next 1/4] bpf: Introduce global percpu data
2025-02-06 0:09 ` Andrii Nakryiko
@ 2025-02-07 9:42 ` Leon Hwang
0 siblings, 0 replies; 20+ messages in thread
From: Leon Hwang @ 2025-02-07 9:42 UTC (permalink / raw)
To: Andrii Nakryiko
Cc: bpf, ast, daniel, andrii, yonghong.song, song, eddyz87, qmo, dxu,
kernel-patches-bot
On 6/2/25 08:09, Andrii Nakryiko wrote:
> On Mon, Jan 27, 2025 at 8:22 AM Leon Hwang <leon.hwang@linux.dev> wrote:
>>
>> This patch introduces global percpu data, inspired by commit
>> 6316f78306c1 ("Merge branch 'support-global-data'"). It enables the
>> definition of global percpu variables in BPF, similar to the
>> DEFINE_PER_CPU() macro in the kernel[0].
>>
>> For example, in BPF, it is able to define a global percpu variable like
>> this:
>>
>> int percpu_data SEC(".percpu");
>>
>> With this patch, tools like retsnoop[1] and bpflbr[2] can simplify their
>> BPF code for handling LBRs. The code can be updated from
>>
>> static struct perf_branch_entry lbrs[1][MAX_LBR_ENTRIES] SEC(".data.lbrs");
>>
>> to
>>
>> static struct perf_branch_entry lbrs[MAX_LBR_ENTRIES] SEC(".percpu.lbrs");
>>
>> This eliminates the need to retrieve the CPU ID using the
>> bpf_get_smp_processor_id() helper.
>>
>> Additionally, by reusing global percpu data map, sharing information
>> between tail callers and callees or freplace callers and callees becomes
>> simpler compared to reusing percpu_array maps.
>>
>> Links:
>> [0] https://github.com/torvalds/linux/blob/fbfd64d25c7af3b8695201ebc85efe90be28c5a3/include/linux/percpu-defs.h#L114
>> [1] https://github.com/anakryiko/retsnoop
>> [2] https://github.com/Asphaltt/bpflbr
>>
>> Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
>> ---
>> kernel/bpf/arraymap.c | 39 ++++++++++++++++++++++++++++++++++++-
>> kernel/bpf/verifier.c | 45 +++++++++++++++++++++++++++++++++++++++++++
>> 2 files changed, 83 insertions(+), 1 deletion(-)
>>
>
> [...]
>
>> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
>> index 9971c03adfd5d..9d99497c2b94c 100644
>> --- a/kernel/bpf/verifier.c
>> +++ b/kernel/bpf/verifier.c
>> @@ -6810,6 +6810,8 @@ static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val,
>> u64 addr;
>> int err;
>>
>> + if (map->map_type != BPF_MAP_TYPE_ARRAY)
>> + return -EINVAL;
>> err = map->ops->map_direct_value_addr(map, &addr, off);
>> if (err)
>> return err;
>> @@ -7322,6 +7324,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
>> /* if map is read-only, track its contents as scalars */
>> if (tnum_is_const(reg->var_off) &&
>> bpf_map_is_rdonly(map) &&
>> + map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY &&
>
> shouldn't this rather be a safer `map->map_type == BPF_MAP_TYPE_ARRAY` check?
>
Ack. I will update it to `map->map_type == BPF_MAP_TYPE_ARRAY`.
Thanks,
Leon
>> map->ops->map_direct_value_addr) {
>> int map_off = off + reg->var_off.value;
>> u64 val = 0;
>
> [...]
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH bpf-next 1/4] bpf: Introduce global percpu data
2025-01-27 16:21 ` [PATCH bpf-next 1/4] " Leon Hwang
2025-02-06 0:09 ` Andrii Nakryiko
@ 2025-02-08 0:23 ` Alexei Starovoitov
2025-02-10 9:35 ` Leon Hwang
1 sibling, 1 reply; 20+ messages in thread
From: Alexei Starovoitov @ 2025-02-08 0:23 UTC (permalink / raw)
To: Leon Hwang
Cc: bpf, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
Yonghong Song, Song Liu, Eddy Z, Quentin Monnet, Daniel Xu,
kernel-patches-bot
On Mon, Jan 27, 2025 at 8:22 AM Leon Hwang <leon.hwang@linux.dev> wrote:
>
> +
> +static int percpu_array_map_direct_value_meta(const struct bpf_map *map,
> + u64 imm, u32 *off)
> +{
> + struct bpf_array *array = container_of(map, struct bpf_array, map);
> + u64 base = (u64) array->pptrs[0];
> + u64 range = array->elem_size;
> +
> + if (map->max_entries != 1)
> + return -EOPNOTSUPP;
> + if (imm < base || imm >= base + range)
> + return -ENOENT;
> + if (!bpf_jit_supports_percpu_insn())
> + return -EOPNOTSUPP;
> +
> + *off = imm - base;
> + return 0;
> +}
Pls add a selftest for off != 0.
I think the above should work, but this is not obvious.
>
> +#ifdef CONFIG_SMP
> + if (insn->code == (BPF_LD | BPF_IMM | BPF_DW) &&
> + (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
> + insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE)) {
Is there a selftest for BPF_PSEUDO_MAP_IDX_VALUE part ?
I couldn't find it.
^ permalink raw reply [flat|nested] 20+ messages in thread* Re: [PATCH bpf-next 1/4] bpf: Introduce global percpu data
2025-02-08 0:23 ` Alexei Starovoitov
@ 2025-02-10 9:35 ` Leon Hwang
0 siblings, 0 replies; 20+ messages in thread
From: Leon Hwang @ 2025-02-10 9:35 UTC (permalink / raw)
To: Alexei Starovoitov
Cc: bpf, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
Yonghong Song, Song Liu, Eddy Z, Quentin Monnet, Daniel Xu,
kernel-patches-bot
On 8/2/25 08:23, Alexei Starovoitov wrote:
> On Mon, Jan 27, 2025 at 8:22 AM Leon Hwang <leon.hwang@linux.dev> wrote:
>>
>> +
>> +static int percpu_array_map_direct_value_meta(const struct bpf_map *map,
>> + u64 imm, u32 *off)
>> +{
>> + struct bpf_array *array = container_of(map, struct bpf_array, map);
>> + u64 base = (u64) array->pptrs[0];
>> + u64 range = array->elem_size;
>> +
>> + if (map->max_entries != 1)
>> + return -EOPNOTSUPP;
>> + if (imm < base || imm >= base + range)
>> + return -ENOENT;
>> + if (!bpf_jit_supports_percpu_insn())
>> + return -EOPNOTSUPP;
>> +
>> + *off = imm - base;
>> + return 0;
>> +}
>
> Pls add a selftest for off != 0.
> I think the above should work, but this is not obvious.
>
Ack.
>>
>> +#ifdef CONFIG_SMP
>> + if (insn->code == (BPF_LD | BPF_IMM | BPF_DW) &&
>> + (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
>> + insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE)) {
>
> Is there a selftest for BPF_PSEUDO_MAP_IDX_VALUE part ?
> I couldn't find it.
Do you mean add a selftest with 'bpftool gen skeleton -L'?
Indeed, it's better to test it.
Thanks,
Leon
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH bpf-next 2/4] bpf, libbpf: Support global percpu data
2025-01-27 16:21 [PATCH bpf-next 0/4] bpf: Introduce global percpu data Leon Hwang
2025-01-27 16:21 ` [PATCH bpf-next 1/4] " Leon Hwang
@ 2025-01-27 16:21 ` Leon Hwang
2025-02-06 0:09 ` Andrii Nakryiko
2025-01-27 16:21 ` [PATCH bpf-next 3/4] bpf, bpftool: Generate skeleton for " Leon Hwang
2025-01-27 16:21 ` [PATCH bpf-next 4/4] selftests/bpf: Add a case to test " Leon Hwang
3 siblings, 1 reply; 20+ messages in thread
From: Leon Hwang @ 2025-01-27 16:21 UTC (permalink / raw)
To: bpf
Cc: ast, daniel, andrii, yonghong.song, song, eddyz87, qmo, dxu,
leon.hwang, kernel-patches-bot
This patch introduces support for global percpu data in libbpf. A new
section named ".percpu" is added, similar to the existing ".data" section.
Internal maps are created for ".percpu" sections, which are then
initialized and populated accordingly.
The changes include:
* Introduction of the ".percpu" section in libbpf.
* Creation of internal maps for percpu data.
* Initialization and population of these maps.
This enhancement allows BPF programs to efficiently manage and access
percpu global data, improving performance for use cases that require
percpu buffer.
Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
tools/lib/bpf/libbpf.c | 172 ++++++++++++++++++++++++++++++++---------
1 file changed, 135 insertions(+), 37 deletions(-)
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 194809da51725..6da6004c5c84d 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -516,6 +516,7 @@ struct bpf_struct_ops {
};
#define DATA_SEC ".data"
+#define PERCPU_DATA_SEC ".percpu"
#define BSS_SEC ".bss"
#define RODATA_SEC ".rodata"
#define KCONFIG_SEC ".kconfig"
@@ -530,6 +531,7 @@ enum libbpf_map_type {
LIBBPF_MAP_BSS,
LIBBPF_MAP_RODATA,
LIBBPF_MAP_KCONFIG,
+ LIBBPF_MAP_PERCPU_DATA,
};
struct bpf_map_def {
@@ -562,6 +564,7 @@ struct bpf_map {
__u32 btf_value_type_id;
__u32 btf_vmlinux_value_type_id;
enum libbpf_map_type libbpf_type;
+ void *data;
void *mmaped;
struct bpf_struct_ops *st_ops;
struct bpf_map *inner_map;
@@ -640,6 +643,7 @@ enum sec_type {
SEC_DATA,
SEC_RODATA,
SEC_ST_OPS,
+ SEC_PERCPU_DATA,
};
struct elf_sec_desc {
@@ -1923,13 +1927,24 @@ static bool map_is_mmapable(struct bpf_object *obj, struct bpf_map *map)
return false;
}
+static void map_copy_data(struct bpf_map *map, const void *data)
+{
+ bool is_percpu_data = map->libbpf_type == LIBBPF_MAP_PERCPU_DATA;
+ size_t data_sz = map->def.value_size;
+
+ if (data)
+ memcpy(is_percpu_data ? map->data : map->mmaped, data, data_sz);
+}
+
static int
bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
const char *real_name, int sec_idx, void *data, size_t data_sz)
{
+ bool is_percpu_data = type == LIBBPF_MAP_PERCPU_DATA;
struct bpf_map_def *def;
struct bpf_map *map;
size_t mmap_sz;
+ size_t elem_sz;
int err;
map = bpf_object__add_map(obj);
@@ -1948,7 +1963,8 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
}
def = &map->def;
- def->type = BPF_MAP_TYPE_ARRAY;
+ def->type = is_percpu_data ? BPF_MAP_TYPE_PERCPU_ARRAY
+ : BPF_MAP_TYPE_ARRAY;
def->key_size = sizeof(int);
def->value_size = data_sz;
def->max_entries = 1;
@@ -1958,29 +1974,53 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
/* failures are fine because of maps like .rodata.str1.1 */
(void) map_fill_btf_type_info(obj, map);
- if (map_is_mmapable(obj, map))
- def->map_flags |= BPF_F_MMAPABLE;
+ pr_debug("map '%s' (global %sdata): at sec_idx %d, offset %zu, flags %x.\n",
+ map->name, is_percpu_data ? "percpu " : "", map->sec_idx,
+ map->sec_offset, def->map_flags);
- pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
- map->name, map->sec_idx, map->sec_offset, def->map_flags);
+ if (is_percpu_data) {
+ elem_sz = roundup(data_sz, 8);
- mmap_sz = bpf_map_mmap_sz(map);
- map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_ANONYMOUS, -1, 0);
- if (map->mmaped == MAP_FAILED) {
- err = -errno;
- map->mmaped = NULL;
- pr_warn("failed to alloc map '%s' content buffer: %s\n", map->name, errstr(err));
- zfree(&map->real_name);
- zfree(&map->name);
- return err;
- }
+ map->data = malloc(elem_sz);
+ if (!map->data) {
+ err = -ENOMEM;
+ pr_warn("map '%s': failed to alloc content buffer: %s\n",
+ map->name, errstr(err));
+ goto free_name;
+ }
- if (data)
- memcpy(map->mmaped, data, data_sz);
+ if (data) {
+ memcpy(map->data, data, data_sz);
+ if (data_sz != elem_sz)
+ memset(map->data + data_sz, 0, elem_sz - data_sz);
+ } else {
+ memset(map->data, 0, elem_sz);
+ }
+ } else {
+ if (map_is_mmapable(obj, map))
+ def->map_flags |= BPF_F_MMAPABLE;
+
+ mmap_sz = bpf_map_mmap_sz(map);
+ map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ if (map->mmaped == MAP_FAILED) {
+ err = -errno;
+ map->mmaped = NULL;
+ pr_warn("map '%s': failed to alloc content buffer: %s\n",
+ map->name, errstr(err));
+ goto free_name;
+ }
+
+ map_copy_data(map, data);
+ }
pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name);
return 0;
+
+free_name:
+ zfree(&map->real_name);
+ zfree(&map->name);
+ return err;
}
static int bpf_object__init_global_data_maps(struct bpf_object *obj)
@@ -2015,6 +2055,13 @@ static int bpf_object__init_global_data_maps(struct bpf_object *obj)
sec_desc->data->d_buf,
sec_desc->data->d_size);
break;
+ case SEC_PERCPU_DATA:
+ sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
+ err = bpf_object__init_internal_map(obj, LIBBPF_MAP_PERCPU_DATA,
+ sec_name, sec_idx,
+ sec_desc->data->d_buf,
+ sec_desc->data->d_size);
+ break;
case SEC_BSS:
sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
@@ -3934,6 +3981,11 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
sec_desc->sec_type = SEC_RODATA;
sec_desc->shdr = sh;
sec_desc->data = data;
+ } else if (strcmp(name, PERCPU_DATA_SEC) == 0 ||
+ str_has_pfx(name, PERCPU_DATA_SEC)) {
+ sec_desc->sec_type = SEC_PERCPU_DATA;
+ sec_desc->shdr = sh;
+ sec_desc->data = data;
} else if (strcmp(name, STRUCT_OPS_SEC) == 0 ||
strcmp(name, STRUCT_OPS_LINK_SEC) == 0 ||
strcmp(name, "?" STRUCT_OPS_SEC) == 0 ||
@@ -4453,6 +4505,7 @@ static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
case SEC_BSS:
case SEC_DATA:
case SEC_RODATA:
+ case SEC_PERCPU_DATA:
return true;
default:
return false;
@@ -4478,6 +4531,8 @@ bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
return LIBBPF_MAP_DATA;
case SEC_RODATA:
return LIBBPF_MAP_RODATA;
+ case SEC_PERCPU_DATA:
+ return LIBBPF_MAP_PERCPU_DATA;
default:
return LIBBPF_MAP_UNSPEC;
}
@@ -4795,7 +4850,7 @@ static int map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map)
/*
* LLVM annotates global data differently in BTF, that is,
- * only as '.data', '.bss' or '.rodata'.
+ * only as '.data', '.bss', '.percpu' or '.rodata'.
*/
if (!bpf_map__is_internal(map))
return -ENOENT;
@@ -5125,23 +5180,54 @@ static int
bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
{
enum libbpf_map_type map_type = map->libbpf_type;
- int err, zero = 0;
+ bool is_percpu_data = map_type == LIBBPF_MAP_PERCPU_DATA;
+ int err = 0, zero = 0;
+ void *data = NULL;
+ int num_cpus, i;
+ size_t data_sz;
+ size_t elem_sz;
size_t mmap_sz;
+ data_sz = map->def.value_size;
+ if (is_percpu_data) {
+ num_cpus = libbpf_num_possible_cpus();
+ if (num_cpus < 0) {
+ err = libbpf_err_errno(num_cpus);
+ pr_warn("map '%s': failed to get num_cpus: %s\n",
+ bpf_map__name(map), errstr(err));
+ return err;
+ }
+
+ elem_sz = roundup(data_sz, 8);
+ data_sz = elem_sz * num_cpus;
+ data = malloc(data_sz);
+ if (!data) {
+ err = -ENOMEM;
+ pr_warn("map '%s': failed to malloc memory: %s\n",
+ bpf_map__name(map), errstr(err));
+ return err;
+ }
+
+ for (i = 0; i < num_cpus; i++)
+ memcpy(data + i * elem_sz, map->data, elem_sz);
+ } else {
+ data = map->mmaped;
+ }
+
if (obj->gen_loader) {
bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps,
- map->mmaped, map->def.value_size);
+ data, data_sz);
if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG)
bpf_gen__map_freeze(obj->gen_loader, map - obj->maps);
- return 0;
+ goto free_data;
}
- err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
+ err = bpf_map_update_elem(map->fd, &zero, data, 0);
if (err) {
err = -errno;
pr_warn("map '%s': failed to set initial contents: %s\n",
bpf_map__name(map), errstr(err));
- return err;
+ goto free_data;
}
/* Freeze .rodata and .kconfig map as read-only from syscall side. */
@@ -5151,7 +5237,7 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
err = -errno;
pr_warn("map '%s': failed to freeze as read-only: %s\n",
bpf_map__name(map), errstr(err));
- return err;
+ goto free_data;
}
}
@@ -5178,7 +5264,7 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
err = -errno;
pr_warn("map '%s': failed to re-mmap() contents: %s\n",
bpf_map__name(map), errstr(err));
- return err;
+ goto free_data;
}
map->mmaped = mmaped;
} else if (map->mmaped) {
@@ -5186,7 +5272,10 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
map->mmaped = NULL;
}
- return 0;
+free_data:
+ if (is_percpu_data)
+ free(data);
+ return err;
}
static void bpf_map__destroy(struct bpf_map *map);
@@ -8120,7 +8209,9 @@ static int bpf_object__sanitize_maps(struct bpf_object *obj)
struct bpf_map *m;
bpf_object__for_each_map(m, obj) {
- if (!bpf_map__is_internal(m))
+ if (!bpf_map__is_internal(m) ||
+ /* percpu data map is internal and not-mmapable. */
+ m->libbpf_type == LIBBPF_MAP_PERCPU_DATA)
continue;
if (!kernel_supports(obj, FEAT_ARRAY_MMAP))
m->def.map_flags &= ~BPF_F_MMAPABLE;
@@ -9041,6 +9132,8 @@ static void bpf_map__destroy(struct bpf_map *map)
if (map->mmaped && map->mmaped != map->obj->arena_data)
munmap(map->mmaped, bpf_map_mmap_sz(map));
map->mmaped = NULL;
+ if (map->data)
+ zfree(&map->data);
if (map->st_ops) {
zfree(&map->st_ops->data);
@@ -10132,14 +10225,18 @@ int bpf_map__fd(const struct bpf_map *map)
static bool map_uses_real_name(const struct bpf_map *map)
{
- /* Since libbpf started to support custom .data.* and .rodata.* maps,
- * their user-visible name differs from kernel-visible name. Users see
- * such map's corresponding ELF section name as a map name.
- * This check distinguishes .data/.rodata from .data.* and .rodata.*
- * maps to know which name has to be returned to the user.
+ /* Since libbpf started to support custom .data.*, .percpu.* and
+ * .rodata.* maps, their user-visible name differs from kernel-visible
+ * name. Users see such map's corresponding ELF section name as a map
+ * name. This check distinguishes .data/.percpu/.rodata from .data.*,
+ * .percpu.* and .rodata.* maps to know which name has to be returned to
+ * the user.
*/
if (map->libbpf_type == LIBBPF_MAP_DATA && strcmp(map->real_name, DATA_SEC) != 0)
return true;
+ if (map->libbpf_type == LIBBPF_MAP_PERCPU_DATA &&
+ strcmp(map->real_name, PERCPU_DATA_SEC) != 0)
+ return true;
if (map->libbpf_type == LIBBPF_MAP_RODATA && strcmp(map->real_name, RODATA_SEC) != 0)
return true;
return false;
@@ -10348,7 +10445,8 @@ int bpf_map__set_initial_value(struct bpf_map *map,
if (map->obj->loaded || map->reused)
return libbpf_err(-EBUSY);
- if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG)
+ if ((!map->mmaped && !map->data) ||
+ map->libbpf_type == LIBBPF_MAP_KCONFIG)
return libbpf_err(-EINVAL);
if (map->def.type == BPF_MAP_TYPE_ARENA)
@@ -10358,7 +10456,7 @@ int bpf_map__set_initial_value(struct bpf_map *map,
if (size != actual_sz)
return libbpf_err(-EINVAL);
- memcpy(map->mmaped, data, size);
+ map_copy_data(map, data);
return 0;
}
@@ -10370,7 +10468,7 @@ void *bpf_map__initial_value(const struct bpf_map *map, size_t *psize)
return map->st_ops->data;
}
- if (!map->mmaped)
+ if (!map->mmaped && !map->data)
return NULL;
if (map->def.type == BPF_MAP_TYPE_ARENA)
@@ -10378,7 +10476,7 @@ void *bpf_map__initial_value(const struct bpf_map *map, size_t *psize)
else
*psize = map->def.value_size;
- return map->mmaped;
+ return map->libbpf_type == LIBBPF_MAP_PERCPU_DATA ? map->data : map->mmaped;
}
bool bpf_map__is_internal(const struct bpf_map *map)
--
2.47.1
^ permalink raw reply related [flat|nested] 20+ messages in thread* Re: [PATCH bpf-next 2/4] bpf, libbpf: Support global percpu data
2025-01-27 16:21 ` [PATCH bpf-next 2/4] bpf, libbpf: Support " Leon Hwang
@ 2025-02-06 0:09 ` Andrii Nakryiko
2025-02-07 9:48 ` Leon Hwang
0 siblings, 1 reply; 20+ messages in thread
From: Andrii Nakryiko @ 2025-02-06 0:09 UTC (permalink / raw)
To: Leon Hwang
Cc: bpf, ast, daniel, andrii, yonghong.song, song, eddyz87, qmo, dxu,
kernel-patches-bot
On Mon, Jan 27, 2025 at 8:22 AM Leon Hwang <leon.hwang@linux.dev> wrote:
>
> This patch introduces support for global percpu data in libbpf. A new
> section named ".percpu" is added, similar to the existing ".data" section.
> Internal maps are created for ".percpu" sections, which are then
> initialized and populated accordingly.
>
> The changes include:
>
> * Introduction of the ".percpu" section in libbpf.
> * Creation of internal maps for percpu data.
> * Initialization and population of these maps.
>
> This enhancement allows BPF programs to efficiently manage and access
> percpu global data, improving performance for use cases that require
> percpu buffer.
>
> Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
> ---
> tools/lib/bpf/libbpf.c | 172 ++++++++++++++++++++++++++++++++---------
> 1 file changed, 135 insertions(+), 37 deletions(-)
>
> diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
> index 194809da51725..6da6004c5c84d 100644
> --- a/tools/lib/bpf/libbpf.c
> +++ b/tools/lib/bpf/libbpf.c
> @@ -516,6 +516,7 @@ struct bpf_struct_ops {
> };
>
> #define DATA_SEC ".data"
> +#define PERCPU_DATA_SEC ".percpu"
> #define BSS_SEC ".bss"
> #define RODATA_SEC ".rodata"
> #define KCONFIG_SEC ".kconfig"
> @@ -530,6 +531,7 @@ enum libbpf_map_type {
> LIBBPF_MAP_BSS,
> LIBBPF_MAP_RODATA,
> LIBBPF_MAP_KCONFIG,
> + LIBBPF_MAP_PERCPU_DATA,
nit: let's keep it shorter: LIBBPF_MAP_PERCPU
> };
>
> struct bpf_map_def {
> @@ -562,6 +564,7 @@ struct bpf_map {
> __u32 btf_value_type_id;
> __u32 btf_vmlinux_value_type_id;
> enum libbpf_map_type libbpf_type;
> + void *data;
> void *mmaped;
> struct bpf_struct_ops *st_ops;
> struct bpf_map *inner_map;
> @@ -640,6 +643,7 @@ enum sec_type {
> SEC_DATA,
> SEC_RODATA,
> SEC_ST_OPS,
> + SEC_PERCPU_DATA,
ditto, just SEC_PERCPU?
> };
>
> struct elf_sec_desc {
> @@ -1923,13 +1927,24 @@ static bool map_is_mmapable(struct bpf_object *obj, struct bpf_map *map)
> return false;
> }
>
> +static void map_copy_data(struct bpf_map *map, const void *data)
> +{
> + bool is_percpu_data = map->libbpf_type == LIBBPF_MAP_PERCPU_DATA;
> + size_t data_sz = map->def.value_size;
> +
> + if (data)
> + memcpy(is_percpu_data ? map->data : map->mmaped, data, data_sz);
> +}
> +
> static int
> bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
> const char *real_name, int sec_idx, void *data, size_t data_sz)
> {
> + bool is_percpu_data = type == LIBBPF_MAP_PERCPU_DATA;
> struct bpf_map_def *def;
> struct bpf_map *map;
> size_t mmap_sz;
> + size_t elem_sz;
nit: just:
size_t mmap_sz, elem_sz;
> int err;
>
> map = bpf_object__add_map(obj);
> @@ -1948,7 +1963,8 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
> }
>
> def = &map->def;
> - def->type = BPF_MAP_TYPE_ARRAY;
> + def->type = is_percpu_data ? BPF_MAP_TYPE_PERCPU_ARRAY
> + : BPF_MAP_TYPE_ARRAY;
nit: single line
> def->key_size = sizeof(int);
> def->value_size = data_sz;
> def->max_entries = 1;
[...]
> + if (map_is_mmapable(obj, map))
> + def->map_flags |= BPF_F_MMAPABLE;
> +
> + mmap_sz = bpf_map_mmap_sz(map);
> + map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
> + MAP_SHARED | MAP_ANONYMOUS, -1, 0);
> + if (map->mmaped == MAP_FAILED) {
> + err = -errno;
> + map->mmaped = NULL;
> + pr_warn("map '%s': failed to alloc content buffer: %s\n",
> + map->name, errstr(err));
> + goto free_name;
> + }
> +
> + map_copy_data(map, data);
why not memcpy() here? you know it's not percpu map, so why obscuring
that memcpy?
> + }
>
> pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name);
> return 0;
[...]
> @@ -5125,23 +5180,54 @@ static int
> bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
> {
> enum libbpf_map_type map_type = map->libbpf_type;
> - int err, zero = 0;
> + bool is_percpu_data = map_type == LIBBPF_MAP_PERCPU_DATA;
> + int err = 0, zero = 0;
> + void *data = NULL;
> + int num_cpus, i;
> + size_t data_sz;
> + size_t elem_sz;
> size_t mmap_sz;
nit: keep those size_t variables grouped: `size_t mmap_sz, data_sz, elem_sz;`
>
> + data_sz = map->def.value_size;
> + if (is_percpu_data) {
> + num_cpus = libbpf_num_possible_cpus();
> + if (num_cpus < 0) {
> + err = libbpf_err_errno(num_cpus);
why? num_cpus *IS* error code if num_cpus < 0
> + pr_warn("map '%s': failed to get num_cpus: %s\n",
> + bpf_map__name(map), errstr(err));
this is unlikely to happen, I'd drop pr_warn()
> + return err;
> + }
> +
> + elem_sz = roundup(data_sz, 8);
> + data_sz = elem_sz * num_cpus;
> + data = malloc(data_sz);
> + if (!data) {
> + err = -ENOMEM;
> + pr_warn("map '%s': failed to malloc memory: %s\n",
> + bpf_map__name(map), errstr(err));
-ENOMEM is rather self-descriptive (and generally not expected), so
don't add pr_warn() for such cases
> + return err;
> + }
> +
> + for (i = 0; i < num_cpus; i++)
> + memcpy(data + i * elem_sz, map->data, elem_sz);
> + } else {
> + data = map->mmaped;
> + }
> +
[...]
> static void bpf_map__destroy(struct bpf_map *map);
> @@ -8120,7 +8209,9 @@ static int bpf_object__sanitize_maps(struct bpf_object *obj)
> struct bpf_map *m;
>
> bpf_object__for_each_map(m, obj) {
> - if (!bpf_map__is_internal(m))
> + if (!bpf_map__is_internal(m) ||
> + /* percpu data map is internal and not-mmapable. */
> + m->libbpf_type == LIBBPF_MAP_PERCPU_DATA)
original logic would work anyways, no? let's not add unnecessary
special casing here
> continue;
> if (!kernel_supports(obj, FEAT_ARRAY_MMAP))
> m->def.map_flags &= ~BPF_F_MMAPABLE;
> @@ -9041,6 +9132,8 @@ static void bpf_map__destroy(struct bpf_map *map)
> if (map->mmaped && map->mmaped != map->obj->arena_data)
> munmap(map->mmaped, bpf_map_mmap_sz(map));
> map->mmaped = NULL;
> + if (map->data)
> + zfree(&map->data);
>
this whole map->mmaped and map->data duality and duplication seems not
worth it, tbh. Maybe we should keep using map->mmaped (we probably
could name it more generically at some point, but I don't want to
start bike shedding now) even for malloc'ed memory? After all, we
already have ARENA as another special case? WDYT, can your changes be
implemented by reusing map->mmaped, taking into account a type of map?
pw-bot: cr
> if (map->st_ops) {
> zfree(&map->st_ops->data);
> @@ -10132,14 +10225,18 @@ int bpf_map__fd(const struct bpf_map *map)
>
> static bool map_uses_real_name(const struct bpf_map *map)
> {
> - /* Since libbpf started to support custom .data.* and .rodata.* maps,
> - * their user-visible name differs from kernel-visible name. Users see
> - * such map's corresponding ELF section name as a map name.
> - * This check distinguishes .data/.rodata from .data.* and .rodata.*
> - * maps to know which name has to be returned to the user.
> + /* Since libbpf started to support custom .data.*, .percpu.* and
> + * .rodata.* maps, their user-visible name differs from kernel-visible
> + * name. Users see such map's corresponding ELF section name as a map
> + * name. This check distinguishes .data/.percpu/.rodata from .data.*,
> + * .percpu.* and .rodata.* maps to know which name has to be returned to
> + * the user.
> */
> if (map->libbpf_type == LIBBPF_MAP_DATA && strcmp(map->real_name, DATA_SEC) != 0)
> return true;
> + if (map->libbpf_type == LIBBPF_MAP_PERCPU_DATA &&
> + strcmp(map->real_name, PERCPU_DATA_SEC) != 0)
> + return true;
nit: shorten LIBBPF_MAP_PERCPU_DATA and keep single line, please
> if (map->libbpf_type == LIBBPF_MAP_RODATA && strcmp(map->real_name, RODATA_SEC) != 0)
> return true;
> return false;
> @@ -10348,7 +10445,8 @@ int bpf_map__set_initial_value(struct bpf_map *map,
> if (map->obj->loaded || map->reused)
> return libbpf_err(-EBUSY);
>
> - if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG)
> + if ((!map->mmaped && !map->data) ||
> + map->libbpf_type == LIBBPF_MAP_KCONFIG)
> return libbpf_err(-EINVAL);
>
> if (map->def.type == BPF_MAP_TYPE_ARENA)
> @@ -10358,7 +10456,7 @@ int bpf_map__set_initial_value(struct bpf_map *map,
> if (size != actual_sz)
> return libbpf_err(-EINVAL);
>
> - memcpy(map->mmaped, data, size);
> + map_copy_data(map, data);
> return 0;
> }
>
> @@ -10370,7 +10468,7 @@ void *bpf_map__initial_value(const struct bpf_map *map, size_t *psize)
> return map->st_ops->data;
> }
>
> - if (!map->mmaped)
> + if (!map->mmaped && !map->data)
> return NULL;
>
> if (map->def.type == BPF_MAP_TYPE_ARENA)
> @@ -10378,7 +10476,7 @@ void *bpf_map__initial_value(const struct bpf_map *map, size_t *psize)
> else
> *psize = map->def.value_size;
>
> - return map->mmaped;
> + return map->libbpf_type == LIBBPF_MAP_PERCPU_DATA ? map->data : map->mmaped;
Good chunk of changes like this wouldn't be necessary if we just reuse
map->mmaped.
> }
>
> bool bpf_map__is_internal(const struct bpf_map *map)
> --
> 2.47.1
>
^ permalink raw reply [flat|nested] 20+ messages in thread* Re: [PATCH bpf-next 2/4] bpf, libbpf: Support global percpu data
2025-02-06 0:09 ` Andrii Nakryiko
@ 2025-02-07 9:48 ` Leon Hwang
0 siblings, 0 replies; 20+ messages in thread
From: Leon Hwang @ 2025-02-07 9:48 UTC (permalink / raw)
To: Andrii Nakryiko
Cc: bpf, ast, daniel, andrii, yonghong.song, song, eddyz87, qmo, dxu,
kernel-patches-bot
On 6/2/25 08:09, Andrii Nakryiko wrote:
> On Mon, Jan 27, 2025 at 8:22 AM Leon Hwang <leon.hwang@linux.dev> wrote:
>>
>> This patch introduces support for global percpu data in libbpf. A new
>> section named ".percpu" is added, similar to the existing ".data" section.
>> Internal maps are created for ".percpu" sections, which are then
>> initialized and populated accordingly.
>>
>> The changes include:
>>
>> * Introduction of the ".percpu" section in libbpf.
>> * Creation of internal maps for percpu data.
>> * Initialization and population of these maps.
>>
>> This enhancement allows BPF programs to efficiently manage and access
>> percpu global data, improving performance for use cases that require
>> percpu buffer.
>>
>> Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
>> ---
>> tools/lib/bpf/libbpf.c | 172 ++++++++++++++++++++++++++++++++---------
>> 1 file changed, 135 insertions(+), 37 deletions(-)
>>
>> diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
>> index 194809da51725..6da6004c5c84d 100644
>> --- a/tools/lib/bpf/libbpf.c
>> +++ b/tools/lib/bpf/libbpf.c
>> @@ -516,6 +516,7 @@ struct bpf_struct_ops {
>> };
>>
>> #define DATA_SEC ".data"
>> +#define PERCPU_DATA_SEC ".percpu"
>> #define BSS_SEC ".bss"
>> #define RODATA_SEC ".rodata"
>> #define KCONFIG_SEC ".kconfig"
>> @@ -530,6 +531,7 @@ enum libbpf_map_type {
>> LIBBPF_MAP_BSS,
>> LIBBPF_MAP_RODATA,
>> LIBBPF_MAP_KCONFIG,
>> + LIBBPF_MAP_PERCPU_DATA,
>
> nit: let's keep it shorter: LIBBPF_MAP_PERCPU
>
Ack.
>> };
>>
>> struct bpf_map_def {
>> @@ -562,6 +564,7 @@ struct bpf_map {
>> __u32 btf_value_type_id;
>> __u32 btf_vmlinux_value_type_id;
>> enum libbpf_map_type libbpf_type;
>> + void *data;
>> void *mmaped;
>> struct bpf_struct_ops *st_ops;
>> struct bpf_map *inner_map;
>> @@ -640,6 +643,7 @@ enum sec_type {
>> SEC_DATA,
>> SEC_RODATA,
>> SEC_ST_OPS,
>> + SEC_PERCPU_DATA,
>
> ditto, just SEC_PERCPU?
>
Ack.
>> };
>>
>> struct elf_sec_desc {
>> @@ -1923,13 +1927,24 @@ static bool map_is_mmapable(struct bpf_object *obj, struct bpf_map *map)
>> return false;
>> }
>>
>> +static void map_copy_data(struct bpf_map *map, const void *data)
>> +{
>> + bool is_percpu_data = map->libbpf_type == LIBBPF_MAP_PERCPU_DATA;
>> + size_t data_sz = map->def.value_size;
>> +
>> + if (data)
>> + memcpy(is_percpu_data ? map->data : map->mmaped, data, data_sz);
>> +}
>> +
>> static int
>> bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
>> const char *real_name, int sec_idx, void *data, size_t data_sz)
>> {
>> + bool is_percpu_data = type == LIBBPF_MAP_PERCPU_DATA;
>> struct bpf_map_def *def;
>> struct bpf_map *map;
>> size_t mmap_sz;
>> + size_t elem_sz;
>
> nit: just:
>
> size_t mmap_sz, elem_sz;
>
>> int err;
>>
>> map = bpf_object__add_map(obj);
>> @@ -1948,7 +1963,8 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
>> }
>>
>> def = &map->def;
>> - def->type = BPF_MAP_TYPE_ARRAY;
>> + def->type = is_percpu_data ? BPF_MAP_TYPE_PERCPU_ARRAY
>> + : BPF_MAP_TYPE_ARRAY;
>
> nit: single line
>
>> def->key_size = sizeof(int);
>> def->value_size = data_sz;
>> def->max_entries = 1;
>
> [...]
>
>> + if (map_is_mmapable(obj, map))
>> + def->map_flags |= BPF_F_MMAPABLE;
>> +
>> + mmap_sz = bpf_map_mmap_sz(map);
>> + map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
>> + MAP_SHARED | MAP_ANONYMOUS, -1, 0);
>> + if (map->mmaped == MAP_FAILED) {
>> + err = -errno;
>> + map->mmaped = NULL;
>> + pr_warn("map '%s': failed to alloc content buffer: %s\n",
>> + map->name, errstr(err));
>> + goto free_name;
>> + }
>> +
>> + map_copy_data(map, data);
>
> why not memcpy() here? you know it's not percpu map, so why obscuring
> that memcpy?
>
>
>> + }
>>
>> pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name);
>> return 0;
>
> [...]
>
>> @@ -5125,23 +5180,54 @@ static int
>> bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
>> {
>> enum libbpf_map_type map_type = map->libbpf_type;
>> - int err, zero = 0;
>> + bool is_percpu_data = map_type == LIBBPF_MAP_PERCPU_DATA;
>> + int err = 0, zero = 0;
>> + void *data = NULL;
>> + int num_cpus, i;
>> + size_t data_sz;
>> + size_t elem_sz;
>> size_t mmap_sz;
>
> nit: keep those size_t variables grouped: `size_t mmap_sz, data_sz, elem_sz;`
>
Ack.
>>
>> + data_sz = map->def.value_size;
>> + if (is_percpu_data) {
>> + num_cpus = libbpf_num_possible_cpus();
>> + if (num_cpus < 0) {
>> + err = libbpf_err_errno(num_cpus);
>
> why? num_cpus *IS* error code if num_cpus < 0
>
Ack. Thanks for pointing this out.
>> + pr_warn("map '%s': failed to get num_cpus: %s\n",
>> + bpf_map__name(map), errstr(err));
>
> this is unlikely to happen, I'd drop pr_warn()
>
Ack.
>> + return err;
>> + }
>> +
>> + elem_sz = roundup(data_sz, 8);
>> + data_sz = elem_sz * num_cpus;
>> + data = malloc(data_sz);
>> + if (!data) {
>> + err = -ENOMEM;
>> + pr_warn("map '%s': failed to malloc memory: %s\n",
>> + bpf_map__name(map), errstr(err));
>
> -ENOMEM is rather self-descriptive (and generally not expected), so
> don't add pr_warn() for such cases
>
Ack.
>> + return err;
>> + }
>> +
>> + for (i = 0; i < num_cpus; i++)
>> + memcpy(data + i * elem_sz, map->data, elem_sz);
>> + } else {
>> + data = map->mmaped;
>> + }
>> +
>
> [...]
>
>> static void bpf_map__destroy(struct bpf_map *map);
>> @@ -8120,7 +8209,9 @@ static int bpf_object__sanitize_maps(struct bpf_object *obj)
>> struct bpf_map *m;
>>
>> bpf_object__for_each_map(m, obj) {
>> - if (!bpf_map__is_internal(m))
>> + if (!bpf_map__is_internal(m) ||
>> + /* percpu data map is internal and not-mmapable. */
>> + m->libbpf_type == LIBBPF_MAP_PERCPU_DATA)
>
> original logic would work anyways, no? let's not add unnecessary
> special casing here
>
The original logic works for it. I'll remove it.
>> continue;
>> if (!kernel_supports(obj, FEAT_ARRAY_MMAP))
>> m->def.map_flags &= ~BPF_F_MMAPABLE;
>> @@ -9041,6 +9132,8 @@ static void bpf_map__destroy(struct bpf_map *map)
>> if (map->mmaped && map->mmaped != map->obj->arena_data)
>> munmap(map->mmaped, bpf_map_mmap_sz(map));
>> map->mmaped = NULL;
>> + if (map->data)
>> + zfree(&map->data);
>>
>
> this whole map->mmaped and map->data duality and duplication seems not
> worth it, tbh. Maybe we should keep using map->mmaped (we probably
> could name it more generically at some point, but I don't want to
> start bike shedding now) even for malloc'ed memory? After all, we
> already have ARENA as another special case? WDYT, can your changes be
> implemented by reusing map->mmaped, taking into account a type of map?
>
It is better to reuse map->mmaped. I'll take a try.
> pw-bot: cr
>
>> if (map->st_ops) {
>> zfree(&map->st_ops->data);
>> @@ -10132,14 +10225,18 @@ int bpf_map__fd(const struct bpf_map *map)
>>
>> static bool map_uses_real_name(const struct bpf_map *map)
>> {
>> - /* Since libbpf started to support custom .data.* and .rodata.* maps,
>> - * their user-visible name differs from kernel-visible name. Users see
>> - * such map's corresponding ELF section name as a map name.
>> - * This check distinguishes .data/.rodata from .data.* and .rodata.*
>> - * maps to know which name has to be returned to the user.
>> + /* Since libbpf started to support custom .data.*, .percpu.* and
>> + * .rodata.* maps, their user-visible name differs from kernel-visible
>> + * name. Users see such map's corresponding ELF section name as a map
>> + * name. This check distinguishes .data/.percpu/.rodata from .data.*,
>> + * .percpu.* and .rodata.* maps to know which name has to be returned to
>> + * the user.
>> */
>> if (map->libbpf_type == LIBBPF_MAP_DATA && strcmp(map->real_name, DATA_SEC) != 0)
>> return true;
>> + if (map->libbpf_type == LIBBPF_MAP_PERCPU_DATA &&
>> + strcmp(map->real_name, PERCPU_DATA_SEC) != 0)
>> + return true;
>
> nit: shorten LIBBPF_MAP_PERCPU_DATA and keep single line, please
>
Ack.
>> if (map->libbpf_type == LIBBPF_MAP_RODATA && strcmp(map->real_name, RODATA_SEC) != 0)
>> return true;
>> return false;
>> @@ -10348,7 +10445,8 @@ int bpf_map__set_initial_value(struct bpf_map *map,
>> if (map->obj->loaded || map->reused)
>> return libbpf_err(-EBUSY);
>>
>> - if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG)
>> + if ((!map->mmaped && !map->data) ||
>> + map->libbpf_type == LIBBPF_MAP_KCONFIG)
>> return libbpf_err(-EINVAL);
>>
>> if (map->def.type == BPF_MAP_TYPE_ARENA)
>> @@ -10358,7 +10456,7 @@ int bpf_map__set_initial_value(struct bpf_map *map,
>> if (size != actual_sz)
>> return libbpf_err(-EINVAL);
>>
>> - memcpy(map->mmaped, data, size);
>> + map_copy_data(map, data);
>> return 0;
>> }
>>
>> @@ -10370,7 +10468,7 @@ void *bpf_map__initial_value(const struct bpf_map *map, size_t *psize)
>> return map->st_ops->data;
>> }
>>
>> - if (!map->mmaped)
>> + if (!map->mmaped && !map->data)
>> return NULL;
>>
>> if (map->def.type == BPF_MAP_TYPE_ARENA)
>> @@ -10378,7 +10476,7 @@ void *bpf_map__initial_value(const struct bpf_map *map, size_t *psize)
>> else
>> *psize = map->def.value_size;
>>
>> - return map->mmaped;
>> + return map->libbpf_type == LIBBPF_MAP_PERCPU_DATA ? map->data : map->mmaped;
>
> Good chunk of changes like this wouldn't be necessary if we just reuse
> map->mmaped.
>
Yes. you're right. It's unnecessary to change it if reuse map->mmaped.
>
>> }
>>
>> bool bpf_map__is_internal(const struct bpf_map *map)
>> --
>> 2.47.1
>>
Thanks,
Leon
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH bpf-next 3/4] bpf, bpftool: Generate skeleton for global percpu data
2025-01-27 16:21 [PATCH bpf-next 0/4] bpf: Introduce global percpu data Leon Hwang
2025-01-27 16:21 ` [PATCH bpf-next 1/4] " Leon Hwang
2025-01-27 16:21 ` [PATCH bpf-next 2/4] bpf, libbpf: Support " Leon Hwang
@ 2025-01-27 16:21 ` Leon Hwang
2025-02-06 0:09 ` Andrii Nakryiko
2025-01-27 16:21 ` [PATCH bpf-next 4/4] selftests/bpf: Add a case to test " Leon Hwang
3 siblings, 1 reply; 20+ messages in thread
From: Leon Hwang @ 2025-01-27 16:21 UTC (permalink / raw)
To: bpf
Cc: ast, daniel, andrii, yonghong.song, song, eddyz87, qmo, dxu,
leon.hwang, kernel-patches-bot
This patch enhances bpftool to generate skeletons for global percpu
variables. The generated skeleton includes a dedicated structure for
percpu data, allowing users to initialize and access percpu variables
efficiently.
Changes:
1. skeleton structure:
For global percpu variables, the skeleton now includes a nested
structure, e.g.:
struct test_global_percpu_data {
struct bpf_object_skeleton *skeleton;
struct bpf_object *obj;
struct {
struct bpf_map *bss;
struct bpf_map *percpu;
} maps;
// ...
struct test_global_percpu_data__percpu {
int percpu_data;
} *percpu;
// ...
};
* The "struct test_global_percpu_data__percpu" points to initialized
data, which is actually "maps.percpu->data".
* Before loading the skeleton, updating the
"struct test_global_percpu_data__percpu" modifies the initial value
of the corresponding global percpu variables.
* After loading the skeleton, accessing or updating this struct is no
longer meaningful. Instead, users must interact with the global
percpu variables via the "maps.percpu" map.
2. code changes:
* Added support for ".percpu" sections in bpftool's map identification
logic.
* Modified skeleton generation to handle percpu data maps
appropriately.
* Updated libbpf to make "percpu" pointing to "maps.percpu->data".
Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
tools/bpf/bpftool/gen.c | 13 +++++++++----
tools/lib/bpf/libbpf.c | 3 +++
tools/lib/bpf/libbpf.h | 1 +
3 files changed, 13 insertions(+), 4 deletions(-)
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 5a4d3240689ed..975775683ca12 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -92,7 +92,7 @@ static void get_header_guard(char *guard, const char *obj_name, const char *suff
static bool get_map_ident(const struct bpf_map *map, char *buf, size_t buf_sz)
{
- static const char *sfxs[] = { ".data", ".rodata", ".bss", ".kconfig" };
+ static const char *sfxs[] = { ".data", ".rodata", ".percpu", ".bss", ".kconfig" };
const char *name = bpf_map__name(map);
int i, n;
@@ -117,7 +117,7 @@ static bool get_map_ident(const struct bpf_map *map, char *buf, size_t buf_sz)
static bool get_datasec_ident(const char *sec_name, char *buf, size_t buf_sz)
{
- static const char *pfxs[] = { ".data", ".rodata", ".bss", ".kconfig" };
+ static const char *pfxs[] = { ".data", ".rodata", ".percpu", ".bss", ".kconfig" };
int i, n;
/* recognize hard coded LLVM section name */
@@ -263,7 +263,9 @@ static bool is_mmapable_map(const struct bpf_map *map, char *buf, size_t sz)
return true;
}
- if (!bpf_map__is_internal(map) || !(bpf_map__map_flags(map) & BPF_F_MMAPABLE))
+ if (!bpf_map__is_internal(map) ||
+ (!(bpf_map__map_flags(map) & BPF_F_MMAPABLE) &&
+ bpf_map__type(map) != BPF_MAP_TYPE_PERCPU_ARRAY))
return false;
if (!get_map_ident(map, buf, sz))
@@ -903,7 +905,10 @@ codegen_maps_skeleton(struct bpf_object *obj, size_t map_cnt, bool mmaped, bool
i, bpf_map__name(map), ident);
/* memory-mapped internal maps */
if (mmaped && is_mmapable_map(map, ident, sizeof(ident))) {
- printf("\tmap->mmaped = (void **)&obj->%s;\n", ident);
+ if (bpf_map__type(map) == BPF_MAP_TYPE_PERCPU_ARRAY)
+ printf("\tmap->data = (void **)&obj->%s;\n", ident);
+ else
+ printf("\tmap->mmaped = (void **)&obj->%s;\n", ident);
}
if (populate_links && bpf_map__type(map) == BPF_MAP_TYPE_STRUCT_OPS) {
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 6da6004c5c84d..dafb419bc5b86 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -13915,6 +13915,7 @@ static int populate_skeleton_maps(const struct bpf_object *obj,
struct bpf_map **map = map_skel->map;
const char *name = map_skel->name;
void **mmaped = map_skel->mmaped;
+ void **data = map_skel->data;
*map = bpf_object__find_map_by_name(obj, name);
if (!*map) {
@@ -13925,6 +13926,8 @@ static int populate_skeleton_maps(const struct bpf_object *obj,
/* externs shouldn't be pre-setup from user code */
if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG)
*mmaped = (*map)->mmaped;
+ if (data && (*map)->libbpf_type == LIBBPF_MAP_PERCPU_DATA)
+ *data = (*map)->data;
}
return 0;
}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 3020ee45303a0..c49d6e44b5630 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -1701,6 +1701,7 @@ struct bpf_map_skeleton {
const char *name;
struct bpf_map **map;
void **mmaped;
+ void **data;
struct bpf_link **link;
};
--
2.47.1
^ permalink raw reply related [flat|nested] 20+ messages in thread* Re: [PATCH bpf-next 3/4] bpf, bpftool: Generate skeleton for global percpu data
2025-01-27 16:21 ` [PATCH bpf-next 3/4] bpf, bpftool: Generate skeleton for " Leon Hwang
@ 2025-02-06 0:09 ` Andrii Nakryiko
2025-02-07 9:52 ` Leon Hwang
0 siblings, 1 reply; 20+ messages in thread
From: Andrii Nakryiko @ 2025-02-06 0:09 UTC (permalink / raw)
To: Leon Hwang
Cc: bpf, ast, daniel, andrii, yonghong.song, song, eddyz87, qmo, dxu,
kernel-patches-bot
On Mon, Jan 27, 2025 at 8:22 AM Leon Hwang <leon.hwang@linux.dev> wrote:
>
> This patch enhances bpftool to generate skeletons for global percpu
> variables. The generated skeleton includes a dedicated structure for
> percpu data, allowing users to initialize and access percpu variables
> efficiently.
>
> Changes:
>
> 1. skeleton structure:
>
> For global percpu variables, the skeleton now includes a nested
> structure, e.g.:
>
> struct test_global_percpu_data {
> struct bpf_object_skeleton *skeleton;
> struct bpf_object *obj;
> struct {
> struct bpf_map *bss;
> struct bpf_map *percpu;
> } maps;
> // ...
> struct test_global_percpu_data__percpu {
> int percpu_data;
> } *percpu;
>
> // ...
> };
>
> * The "struct test_global_percpu_data__percpu" points to initialized
> data, which is actually "maps.percpu->data".
> * Before loading the skeleton, updating the
> "struct test_global_percpu_data__percpu" modifies the initial value
> of the corresponding global percpu variables.
> * After loading the skeleton, accessing or updating this struct is no
> longer meaningful. Instead, users must interact with the global
> percpu variables via the "maps.percpu" map.
can we set this pointer to NULL after load to avoid accidental
(successful) reads/writes to it?
>
> 2. code changes:
>
> * Added support for ".percpu" sections in bpftool's map identification
> logic.
> * Modified skeleton generation to handle percpu data maps
> appropriately.
> * Updated libbpf to make "percpu" pointing to "maps.percpu->data".
>
> Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
> ---
> tools/bpf/bpftool/gen.c | 13 +++++++++----
> tools/lib/bpf/libbpf.c | 3 +++
> tools/lib/bpf/libbpf.h | 1 +
> 3 files changed, 13 insertions(+), 4 deletions(-)
>
> diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
> index 5a4d3240689ed..975775683ca12 100644
> --- a/tools/bpf/bpftool/gen.c
> +++ b/tools/bpf/bpftool/gen.c
> @@ -92,7 +92,7 @@ static void get_header_guard(char *guard, const char *obj_name, const char *suff
>
> static bool get_map_ident(const struct bpf_map *map, char *buf, size_t buf_sz)
> {
> - static const char *sfxs[] = { ".data", ".rodata", ".bss", ".kconfig" };
> + static const char *sfxs[] = { ".data", ".rodata", ".percpu", ".bss", ".kconfig" };
> const char *name = bpf_map__name(map);
> int i, n;
>
> @@ -117,7 +117,7 @@ static bool get_map_ident(const struct bpf_map *map, char *buf, size_t buf_sz)
>
> static bool get_datasec_ident(const char *sec_name, char *buf, size_t buf_sz)
> {
> - static const char *pfxs[] = { ".data", ".rodata", ".bss", ".kconfig" };
> + static const char *pfxs[] = { ".data", ".rodata", ".percpu", ".bss", ".kconfig" };
> int i, n;
>
> /* recognize hard coded LLVM section name */
> @@ -263,7 +263,9 @@ static bool is_mmapable_map(const struct bpf_map *map, char *buf, size_t sz)
> return true;
> }
>
> - if (!bpf_map__is_internal(map) || !(bpf_map__map_flags(map) & BPF_F_MMAPABLE))
> + if (!bpf_map__is_internal(map) ||
> + (!(bpf_map__map_flags(map) & BPF_F_MMAPABLE) &&
> + bpf_map__type(map) != BPF_MAP_TYPE_PERCPU_ARRAY))
there will be no BPF_F_MMAPABLE set for PERCPU_ARRAY, why adding these
unnecessary extra conditionals?
> return false;
>
> if (!get_map_ident(map, buf, sz))
> @@ -903,7 +905,10 @@ codegen_maps_skeleton(struct bpf_object *obj, size_t map_cnt, bool mmaped, bool
> i, bpf_map__name(map), ident);
> /* memory-mapped internal maps */
> if (mmaped && is_mmapable_map(map, ident, sizeof(ident))) {
> - printf("\tmap->mmaped = (void **)&obj->%s;\n", ident);
> + if (bpf_map__type(map) == BPF_MAP_TYPE_PERCPU_ARRAY)
> + printf("\tmap->data = (void **)&obj->%s;\n", ident);
> + else
> + printf("\tmap->mmaped = (void **)&obj->%s;\n", ident);
> }
>
> if (populate_links && bpf_map__type(map) == BPF_MAP_TYPE_STRUCT_OPS) {
> diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
> index 6da6004c5c84d..dafb419bc5b86 100644
> --- a/tools/lib/bpf/libbpf.c
> +++ b/tools/lib/bpf/libbpf.c
> @@ -13915,6 +13915,7 @@ static int populate_skeleton_maps(const struct bpf_object *obj,
> struct bpf_map **map = map_skel->map;
> const char *name = map_skel->name;
> void **mmaped = map_skel->mmaped;
> + void **data = map_skel->data;
>
> *map = bpf_object__find_map_by_name(obj, name);
> if (!*map) {
> @@ -13925,6 +13926,8 @@ static int populate_skeleton_maps(const struct bpf_object *obj,
> /* externs shouldn't be pre-setup from user code */
> if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG)
> *mmaped = (*map)->mmaped;
> + if (data && (*map)->libbpf_type == LIBBPF_MAP_PERCPU_DATA)
> + *data = (*map)->data;
> }
> return 0;
> }
> diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
> index 3020ee45303a0..c49d6e44b5630 100644
> --- a/tools/lib/bpf/libbpf.h
> +++ b/tools/lib/bpf/libbpf.h
> @@ -1701,6 +1701,7 @@ struct bpf_map_skeleton {
> const char *name;
> struct bpf_map **map;
> void **mmaped;
> + void **data;
this is breaking backwards/forward compatibility. let's try to reuse mmaped
> struct bpf_link **link;
> };
>
> --
> 2.47.1
>
^ permalink raw reply [flat|nested] 20+ messages in thread* Re: [PATCH bpf-next 3/4] bpf, bpftool: Generate skeleton for global percpu data
2025-02-06 0:09 ` Andrii Nakryiko
@ 2025-02-07 9:52 ` Leon Hwang
0 siblings, 0 replies; 20+ messages in thread
From: Leon Hwang @ 2025-02-07 9:52 UTC (permalink / raw)
To: Andrii Nakryiko
Cc: bpf, ast, daniel, andrii, yonghong.song, song, eddyz87, qmo, dxu,
kernel-patches-bot
On 6/2/25 08:09, Andrii Nakryiko wrote:
> On Mon, Jan 27, 2025 at 8:22 AM Leon Hwang <leon.hwang@linux.dev> wrote:
>>
>> This patch enhances bpftool to generate skeletons for global percpu
>> variables. The generated skeleton includes a dedicated structure for
>> percpu data, allowing users to initialize and access percpu variables
>> efficiently.
>>
>> Changes:
>>
>> 1. skeleton structure:
>>
>> For global percpu variables, the skeleton now includes a nested
>> structure, e.g.:
>>
>> struct test_global_percpu_data {
>> struct bpf_object_skeleton *skeleton;
>> struct bpf_object *obj;
>> struct {
>> struct bpf_map *bss;
>> struct bpf_map *percpu;
>> } maps;
>> // ...
>> struct test_global_percpu_data__percpu {
>> int percpu_data;
>> } *percpu;
>>
>> // ...
>> };
>>
>> * The "struct test_global_percpu_data__percpu" points to initialized
>> data, which is actually "maps.percpu->data".
>> * Before loading the skeleton, updating the
>> "struct test_global_percpu_data__percpu" modifies the initial value
>> of the corresponding global percpu variables.
>> * After loading the skeleton, accessing or updating this struct is no
>> longer meaningful. Instead, users must interact with the global
>> percpu variables via the "maps.percpu" map.
>
> can we set this pointer to NULL after load to avoid accidental
> (successful) reads/writes to it?
>
Good idea. I'll try to achieve it.
>>
>> 2. code changes:
>>
>> * Added support for ".percpu" sections in bpftool's map identification
>> logic.
>> * Modified skeleton generation to handle percpu data maps
>> appropriately.
>> * Updated libbpf to make "percpu" pointing to "maps.percpu->data".
>>
>> Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
>> ---
>> tools/bpf/bpftool/gen.c | 13 +++++++++----
>> tools/lib/bpf/libbpf.c | 3 +++
>> tools/lib/bpf/libbpf.h | 1 +
>> 3 files changed, 13 insertions(+), 4 deletions(-)
>>
>> diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
>> index 5a4d3240689ed..975775683ca12 100644
>> --- a/tools/bpf/bpftool/gen.c
>> +++ b/tools/bpf/bpftool/gen.c
>> @@ -92,7 +92,7 @@ static void get_header_guard(char *guard, const char *obj_name, const char *suff
>>
>> static bool get_map_ident(const struct bpf_map *map, char *buf, size_t buf_sz)
>> {
>> - static const char *sfxs[] = { ".data", ".rodata", ".bss", ".kconfig" };
>> + static const char *sfxs[] = { ".data", ".rodata", ".percpu", ".bss", ".kconfig" };
>> const char *name = bpf_map__name(map);
>> int i, n;
>>
>> @@ -117,7 +117,7 @@ static bool get_map_ident(const struct bpf_map *map, char *buf, size_t buf_sz)
>>
>> static bool get_datasec_ident(const char *sec_name, char *buf, size_t buf_sz)
>> {
>> - static const char *pfxs[] = { ".data", ".rodata", ".bss", ".kconfig" };
>> + static const char *pfxs[] = { ".data", ".rodata", ".percpu", ".bss", ".kconfig" };
>> int i, n;
>>
>> /* recognize hard coded LLVM section name */
>> @@ -263,7 +263,9 @@ static bool is_mmapable_map(const struct bpf_map *map, char *buf, size_t sz)
>> return true;
>> }
>>
>> - if (!bpf_map__is_internal(map) || !(bpf_map__map_flags(map) & BPF_F_MMAPABLE))
>> + if (!bpf_map__is_internal(map) ||
>> + (!(bpf_map__map_flags(map) & BPF_F_MMAPABLE) &&
>> + bpf_map__type(map) != BPF_MAP_TYPE_PERCPU_ARRAY))
>
> there will be no BPF_F_MMAPABLE set for PERCPU_ARRAY, why adding these
> unnecessary extra conditionals?
>
Ack. It's unnecessary.
>> return false;
>>
>> if (!get_map_ident(map, buf, sz))
>> @@ -903,7 +905,10 @@ codegen_maps_skeleton(struct bpf_object *obj, size_t map_cnt, bool mmaped, bool
>> i, bpf_map__name(map), ident);
>> /* memory-mapped internal maps */
>> if (mmaped && is_mmapable_map(map, ident, sizeof(ident))) {
>> - printf("\tmap->mmaped = (void **)&obj->%s;\n", ident);
>> + if (bpf_map__type(map) == BPF_MAP_TYPE_PERCPU_ARRAY)
>> + printf("\tmap->data = (void **)&obj->%s;\n", ident);
>> + else
>> + printf("\tmap->mmaped = (void **)&obj->%s;\n", ident);
>> }
>>
>> if (populate_links && bpf_map__type(map) == BPF_MAP_TYPE_STRUCT_OPS) {
>> diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
>> index 6da6004c5c84d..dafb419bc5b86 100644
>> --- a/tools/lib/bpf/libbpf.c
>> +++ b/tools/lib/bpf/libbpf.c
>> @@ -13915,6 +13915,7 @@ static int populate_skeleton_maps(const struct bpf_object *obj,
>> struct bpf_map **map = map_skel->map;
>> const char *name = map_skel->name;
>> void **mmaped = map_skel->mmaped;
>> + void **data = map_skel->data;
>>
>> *map = bpf_object__find_map_by_name(obj, name);
>> if (!*map) {
>> @@ -13925,6 +13926,8 @@ static int populate_skeleton_maps(const struct bpf_object *obj,
>> /* externs shouldn't be pre-setup from user code */
>> if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG)
>> *mmaped = (*map)->mmaped;
>> + if (data && (*map)->libbpf_type == LIBBPF_MAP_PERCPU_DATA)
>> + *data = (*map)->data;
>> }
>> return 0;
>> }
>> diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
>> index 3020ee45303a0..c49d6e44b5630 100644
>> --- a/tools/lib/bpf/libbpf.h
>> +++ b/tools/lib/bpf/libbpf.h
>> @@ -1701,6 +1701,7 @@ struct bpf_map_skeleton {
>> const char *name;
>> struct bpf_map **map;
>> void **mmaped;
>> + void **data;
>
> this is breaking backwards/forward compatibility. let's try to reuse mmaped
>
Indeed. If reuse map->mmaped for global percpu variables, it's
unnecessary to add this "data".
>> struct bpf_link **link;
>> };
>>
>> --
>> 2.47.1
>>
Thanks,
Leon
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH bpf-next 4/4] selftests/bpf: Add a case to test global percpu data
2025-01-27 16:21 [PATCH bpf-next 0/4] bpf: Introduce global percpu data Leon Hwang
` (2 preceding siblings ...)
2025-01-27 16:21 ` [PATCH bpf-next 3/4] bpf, bpftool: Generate skeleton for " Leon Hwang
@ 2025-01-27 16:21 ` Leon Hwang
2025-02-06 0:09 ` Andrii Nakryiko
3 siblings, 1 reply; 20+ messages in thread
From: Leon Hwang @ 2025-01-27 16:21 UTC (permalink / raw)
To: bpf
Cc: ast, daniel, andrii, yonghong.song, song, eddyz87, qmo, dxu,
leon.hwang, kernel-patches-bot
If the arch, like s390x, does not support percpu insn, this case won't
test global percpu data by checking -EOPNOTSUPP when load prog.
The following APIs have been tested for global percpu data:
1. bpf_map__set_initial_value()
2. bpf_map__initial_value()
3. generated percpu struct pointer that points to internal map's data
4. bpf_map__lookup_elem() for global percpu data map
cd tools/testing/selftests/bpf; ./test_progs -t global_percpu_data
124 global_percpu_data_init:OK
Summary: 1/0 PASSED, 0 SKIPPED, 0 FAILED
Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
.../bpf/prog_tests/global_data_init.c | 89 ++++++++++++++++++-
.../bpf/progs/test_global_percpu_data.c | 21 +++++
2 files changed, 109 insertions(+), 1 deletion(-)
create mode 100644 tools/testing/selftests/bpf/progs/test_global_percpu_data.c
diff --git a/tools/testing/selftests/bpf/prog_tests/global_data_init.c b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
index 8466332d7406f..a5d0890444f67 100644
--- a/tools/testing/selftests/bpf/prog_tests/global_data_init.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include "test_global_percpu_data.skel.h"
void test_global_data_init(void)
{
@@ -8,7 +9,7 @@ void test_global_data_init(void)
__u8 *buff = NULL, *newval = NULL;
struct bpf_object *obj;
struct bpf_map *map;
- __u32 duration = 0;
+ __u32 duration = 0;
size_t sz;
obj = bpf_object__open_file(file, NULL);
@@ -60,3 +61,89 @@ void test_global_data_init(void)
free(newval);
bpf_object__close(obj);
}
+
+void test_global_percpu_data_init(void)
+{
+ struct test_global_percpu_data *skel = NULL;
+ u64 *percpu_data = NULL;
+ struct bpf_map *map;
+ size_t init_data_sz;
+ char buff[128] = {};
+ int init_value = 2;
+ int key, value_sz;
+ int prog_fd, err;
+ int *init_data;
+ int num_cpus;
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = buff,
+ .data_size_in = sizeof(buff),
+ .repeat = 1,
+ );
+
+ num_cpus = libbpf_num_possible_cpus();
+ if (!ASSERT_GT(num_cpus, 0, "libbpf_num_possible_cpus"))
+ return;
+
+ percpu_data = calloc(num_cpus, sizeof(*percpu_data));
+ if (!ASSERT_FALSE(percpu_data == NULL, "calloc percpu_data"))
+ return;
+
+ value_sz = sizeof(*percpu_data) * num_cpus;
+ memset(percpu_data, 0, value_sz);
+
+ skel = test_global_percpu_data__open();
+ if (!ASSERT_OK_PTR(skel, "test_global_percpu_data__open"))
+ goto out;
+
+ ASSERT_EQ(skel->percpu->percpu_data, -1, "skel->percpu->percpu_data");
+
+ map = skel->maps.percpu;
+ err = bpf_map__set_initial_value(map, &init_value,
+ sizeof(init_value));
+ if (!ASSERT_OK(err, "bpf_map__set_initial_value"))
+ goto out;
+
+ init_data = bpf_map__initial_value(map, &init_data_sz);
+ if (!ASSERT_OK_PTR(init_data, "bpf_map__initial_value"))
+ goto out;
+
+ ASSERT_EQ(*init_data, init_value, "initial_value");
+ ASSERT_EQ(init_data_sz, sizeof(init_value), "initial_value size");
+
+ if (!ASSERT_EQ((void *) init_data, (void *) skel->percpu, "skel->percpu"))
+ goto out;
+ ASSERT_EQ(skel->percpu->percpu_data, init_value, "skel->percpu->percpu_data");
+
+ err = test_global_percpu_data__load(skel);
+ if (err == -EOPNOTSUPP)
+ goto out;
+ if (!ASSERT_OK(err, "test_global_percpu_data__load"))
+ goto out;
+
+ ASSERT_EQ(bpf_map__type(map), BPF_MAP_TYPE_PERCPU_ARRAY,
+ "bpf_map__type");
+
+ prog_fd = bpf_program__fd(skel->progs.update_percpu_data);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "update_percpu_data");
+ ASSERT_EQ(topts.retval, 0, "update_percpu_data retval");
+
+ key = 0;
+ err = bpf_map__lookup_elem(map, &key, sizeof(key), percpu_data,
+ value_sz, 0);
+ if (!ASSERT_OK(err, "bpf_map__lookup_elem"))
+ goto out;
+
+ if (!ASSERT_LT(skel->bss->curr_cpu, num_cpus, "curr_cpu"))
+ goto out;
+ ASSERT_EQ((int) percpu_data[skel->bss->curr_cpu], 1, "percpu_data");
+ if (num_cpus > 1)
+ ASSERT_EQ((int) percpu_data[(skel->bss->curr_cpu+1)%num_cpus],
+ init_value, "init_value");
+
+out:
+ test_global_percpu_data__destroy(skel);
+ if (percpu_data)
+ free(percpu_data);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_global_percpu_data.c b/tools/testing/selftests/bpf/progs/test_global_percpu_data.c
new file mode 100644
index 0000000000000..731c3214b0bb4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_global_percpu_data.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Leon Hwang */
+
+#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
+
+#include <bpf/bpf_helpers.h>
+
+int percpu_data SEC(".percpu") = -1;
+int curr_cpu;
+
+SEC("tc")
+int update_percpu_data(struct __sk_buff *skb)
+{
+ curr_cpu = bpf_get_smp_processor_id();
+ percpu_data = 1;
+
+ return TC_ACT_OK;
+}
+
+char _license[] SEC("license") = "GPL";
--
2.47.1
^ permalink raw reply related [flat|nested] 20+ messages in thread* Re: [PATCH bpf-next 4/4] selftests/bpf: Add a case to test global percpu data
2025-01-27 16:21 ` [PATCH bpf-next 4/4] selftests/bpf: Add a case to test " Leon Hwang
@ 2025-02-06 0:09 ` Andrii Nakryiko
2025-02-07 10:00 ` Leon Hwang
0 siblings, 1 reply; 20+ messages in thread
From: Andrii Nakryiko @ 2025-02-06 0:09 UTC (permalink / raw)
To: Leon Hwang
Cc: bpf, ast, daniel, andrii, yonghong.song, song, eddyz87, qmo, dxu,
kernel-patches-bot
On Mon, Jan 27, 2025 at 8:22 AM Leon Hwang <leon.hwang@linux.dev> wrote:
>
> If the arch, like s390x, does not support percpu insn, this case won't
> test global percpu data by checking -EOPNOTSUPP when load prog.
>
> The following APIs have been tested for global percpu data:
> 1. bpf_map__set_initial_value()
> 2. bpf_map__initial_value()
> 3. generated percpu struct pointer that points to internal map's data
> 4. bpf_map__lookup_elem() for global percpu data map
>
> cd tools/testing/selftests/bpf; ./test_progs -t global_percpu_data
> 124 global_percpu_data_init:OK
> Summary: 1/0 PASSED, 0 SKIPPED, 0 FAILED
>
> Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
> ---
> .../bpf/prog_tests/global_data_init.c | 89 ++++++++++++++++++-
> .../bpf/progs/test_global_percpu_data.c | 21 +++++
> 2 files changed, 109 insertions(+), 1 deletion(-)
> create mode 100644 tools/testing/selftests/bpf/progs/test_global_percpu_data.c
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/global_data_init.c b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
> index 8466332d7406f..a5d0890444f67 100644
> --- a/tools/testing/selftests/bpf/prog_tests/global_data_init.c
> +++ b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
> @@ -1,5 +1,6 @@
> // SPDX-License-Identifier: GPL-2.0
> #include <test_progs.h>
> +#include "test_global_percpu_data.skel.h"
>
> void test_global_data_init(void)
> {
> @@ -8,7 +9,7 @@ void test_global_data_init(void)
> __u8 *buff = NULL, *newval = NULL;
> struct bpf_object *obj;
> struct bpf_map *map;
> - __u32 duration = 0;
> + __u32 duration = 0;
> size_t sz;
>
> obj = bpf_object__open_file(file, NULL);
> @@ -60,3 +61,89 @@ void test_global_data_init(void)
> free(newval);
> bpf_object__close(obj);
> }
> +
> +void test_global_percpu_data_init(void)
> +{
> + struct test_global_percpu_data *skel = NULL;
> + u64 *percpu_data = NULL;
there is that test_global_percpu_data__percpu type you are declaring
in the BPF skeleton, right? We should try using it here.
And for that array access, we should make sure that it's __aligned(8),
so indexing by CPU index works correctly.
Also, you define per-CPU variable as int, but here it is u64, what's
up with that?
> + struct bpf_map *map;
> + size_t init_data_sz;
> + char buff[128] = {};
> + int init_value = 2;
> + int key, value_sz;
> + int prog_fd, err;
> + int *init_data;
> + int num_cpus;
> +
nit: LIBBPF_OPTS below is variable declaration, so there shouldn't be
an empty line here (and maybe group those int variables a bit more
tightly?)
> + LIBBPF_OPTS(bpf_test_run_opts, topts,
> + .data_in = buff,
> + .data_size_in = sizeof(buff),
> + .repeat = 1,
> + );
> +
> + num_cpus = libbpf_num_possible_cpus();
> + if (!ASSERT_GT(num_cpus, 0, "libbpf_num_possible_cpus"))
> + return;
> +
> + percpu_data = calloc(num_cpus, sizeof(*percpu_data));
> + if (!ASSERT_FALSE(percpu_data == NULL, "calloc percpu_data"))
ASSERT_OK_PTR()
> + return;
> +
> + value_sz = sizeof(*percpu_data) * num_cpus;
> + memset(percpu_data, 0, value_sz);
you calloc()'ed it, it's already zero-initialized
> +
> + skel = test_global_percpu_data__open();
> + if (!ASSERT_OK_PTR(skel, "test_global_percpu_data__open"))
> + goto out;
> +
> + ASSERT_EQ(skel->percpu->percpu_data, -1, "skel->percpu->percpu_data");
> +
> + map = skel->maps.percpu;
> + err = bpf_map__set_initial_value(map, &init_value,
> + sizeof(init_value));
> + if (!ASSERT_OK(err, "bpf_map__set_initial_value"))
> + goto out;
> +
> + init_data = bpf_map__initial_value(map, &init_data_sz);
> + if (!ASSERT_OK_PTR(init_data, "bpf_map__initial_value"))
> + goto out;
> +
> + ASSERT_EQ(*init_data, init_value, "initial_value");
> + ASSERT_EQ(init_data_sz, sizeof(init_value), "initial_value size");
> +
> + if (!ASSERT_EQ((void *) init_data, (void *) skel->percpu, "skel->percpu"))
> + goto out;
> + ASSERT_EQ(skel->percpu->percpu_data, init_value, "skel->percpu->percpu_data");
> +
> + err = test_global_percpu_data__load(skel);
> + if (err == -EOPNOTSUPP)
> + goto out;
> + if (!ASSERT_OK(err, "test_global_percpu_data__load"))
> + goto out;
> +
> + ASSERT_EQ(bpf_map__type(map), BPF_MAP_TYPE_PERCPU_ARRAY,
> + "bpf_map__type");
> +
> + prog_fd = bpf_program__fd(skel->progs.update_percpu_data);
> + err = bpf_prog_test_run_opts(prog_fd, &topts);
at least one of BPF programs (don't remember which one, could be
raw_tp) supports specifying CPU index to run on, it would be nice to
loop over CPUs, triggering BPF program on each one and filling per-CPU
variable with current CPU index. Then we can check that all per-CPU
values have expected values.
> + ASSERT_OK(err, "update_percpu_data");
> + ASSERT_EQ(topts.retval, 0, "update_percpu_data retval");
> +
> + key = 0;
> + err = bpf_map__lookup_elem(map, &key, sizeof(key), percpu_data,
> + value_sz, 0);
> + if (!ASSERT_OK(err, "bpf_map__lookup_elem"))
> + goto out;
> +
> + if (!ASSERT_LT(skel->bss->curr_cpu, num_cpus, "curr_cpu"))
> + goto out;
> + ASSERT_EQ((int) percpu_data[skel->bss->curr_cpu], 1, "percpu_data");
> + if (num_cpus > 1)
> + ASSERT_EQ((int) percpu_data[(skel->bss->curr_cpu+1)%num_cpus],
> + init_value, "init_value");
> +
> +out:
> + test_global_percpu_data__destroy(skel);
> + if (percpu_data)
> + free(percpu_data);
> +}
> diff --git a/tools/testing/selftests/bpf/progs/test_global_percpu_data.c b/tools/testing/selftests/bpf/progs/test_global_percpu_data.c
> new file mode 100644
> index 0000000000000..731c3214b0bb4
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/test_global_percpu_data.c
> @@ -0,0 +1,21 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright Leon Hwang */
> +
> +#include <linux/bpf.h>
> +#include <linux/pkt_cls.h>
> +
> +#include <bpf/bpf_helpers.h>
> +
> +int percpu_data SEC(".percpu") = -1;
> +int curr_cpu;
> +
> +SEC("tc")
> +int update_percpu_data(struct __sk_buff *skb)
> +{
> + curr_cpu = bpf_get_smp_processor_id();
> + percpu_data = 1;
> +
> + return TC_ACT_OK;
> +}
> +
> +char _license[] SEC("license") = "GPL";
> --
> 2.47.1
>
^ permalink raw reply [flat|nested] 20+ messages in thread* Re: [PATCH bpf-next 4/4] selftests/bpf: Add a case to test global percpu data
2025-02-06 0:09 ` Andrii Nakryiko
@ 2025-02-07 10:00 ` Leon Hwang
2025-02-07 19:48 ` Andrii Nakryiko
0 siblings, 1 reply; 20+ messages in thread
From: Leon Hwang @ 2025-02-07 10:00 UTC (permalink / raw)
To: Andrii Nakryiko
Cc: bpf, ast, daniel, andrii, yonghong.song, song, eddyz87, qmo, dxu,
kernel-patches-bot
On 6/2/25 08:09, Andrii Nakryiko wrote:
> On Mon, Jan 27, 2025 at 8:22 AM Leon Hwang <leon.hwang@linux.dev> wrote:
>>
>> If the arch, like s390x, does not support percpu insn, this case won't
>> test global percpu data by checking -EOPNOTSUPP when load prog.
>>
>> The following APIs have been tested for global percpu data:
>> 1. bpf_map__set_initial_value()
>> 2. bpf_map__initial_value()
>> 3. generated percpu struct pointer that points to internal map's data
>> 4. bpf_map__lookup_elem() for global percpu data map
>>
>> cd tools/testing/selftests/bpf; ./test_progs -t global_percpu_data
>> 124 global_percpu_data_init:OK
>> Summary: 1/0 PASSED, 0 SKIPPED, 0 FAILED
>>
>> Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
>> ---
>> .../bpf/prog_tests/global_data_init.c | 89 ++++++++++++++++++-
>> .../bpf/progs/test_global_percpu_data.c | 21 +++++
>> 2 files changed, 109 insertions(+), 1 deletion(-)
>> create mode 100644 tools/testing/selftests/bpf/progs/test_global_percpu_data.c
>>
>> diff --git a/tools/testing/selftests/bpf/prog_tests/global_data_init.c b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
>> index 8466332d7406f..a5d0890444f67 100644
>> --- a/tools/testing/selftests/bpf/prog_tests/global_data_init.c
>> +++ b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
>> @@ -1,5 +1,6 @@
>> // SPDX-License-Identifier: GPL-2.0
>> #include <test_progs.h>
>> +#include "test_global_percpu_data.skel.h"
>>
>> void test_global_data_init(void)
>> {
>> @@ -8,7 +9,7 @@ void test_global_data_init(void)
>> __u8 *buff = NULL, *newval = NULL;
>> struct bpf_object *obj;
>> struct bpf_map *map;
>> - __u32 duration = 0;
>> + __u32 duration = 0;
>> size_t sz;
>>
>> obj = bpf_object__open_file(file, NULL);
>> @@ -60,3 +61,89 @@ void test_global_data_init(void)
>> free(newval);
>> bpf_object__close(obj);
>> }
>> +
>> +void test_global_percpu_data_init(void)
>> +{
>> + struct test_global_percpu_data *skel = NULL;
>> + u64 *percpu_data = NULL;
>
> there is that test_global_percpu_data__percpu type you are declaring
> in the BPF skeleton, right? We should try using it here.
>
No. bpftool does not generate test_global_percpu_data__percpu. The
struct for global variables is embedded into skeleton struct.
Should we generate type for global variables?
> And for that array access, we should make sure that it's __aligned(8),
> so indexing by CPU index works correctly.
>
Ack.
> Also, you define per-CPU variable as int, but here it is u64, what's
> up with that?
>
Like __aligned(8), it's to make sure 8-bytes aligned. It's better to use
__aligned(8).
>> + struct bpf_map *map;
>> + size_t init_data_sz;
>> + char buff[128] = {};
>> + int init_value = 2;
>> + int key, value_sz;
>> + int prog_fd, err;
>> + int *init_data;
>> + int num_cpus;
>> +
>
> nit: LIBBPF_OPTS below is variable declaration, so there shouldn't be
> an empty line here (and maybe group those int variables a bit more
> tightly?)
>
Ack.
>> + LIBBPF_OPTS(bpf_test_run_opts, topts,
>> + .data_in = buff,
>> + .data_size_in = sizeof(buff),
>> + .repeat = 1,
>> + );
>> +
>> + num_cpus = libbpf_num_possible_cpus();
>> + if (!ASSERT_GT(num_cpus, 0, "libbpf_num_possible_cpus"))
>> + return;
>> +
>> + percpu_data = calloc(num_cpus, sizeof(*percpu_data));
>> + if (!ASSERT_FALSE(percpu_data == NULL, "calloc percpu_data"))
>
> ASSERT_OK_PTR()
>
Ack.
>> + return;
>> +
>> + value_sz = sizeof(*percpu_data) * num_cpus;
>> + memset(percpu_data, 0, value_sz);
>
> you calloc()'ed it, it's already zero-initialized
>
Ack. Thanks. I should check "man calloc" to use it.
>
>> +
>> + skel = test_global_percpu_data__open();
>> + if (!ASSERT_OK_PTR(skel, "test_global_percpu_data__open"))
>> + goto out;
>> +
>> + ASSERT_EQ(skel->percpu->percpu_data, -1, "skel->percpu->percpu_data");
>> +
>> + map = skel->maps.percpu;
>> + err = bpf_map__set_initial_value(map, &init_value,
>> + sizeof(init_value));
>> + if (!ASSERT_OK(err, "bpf_map__set_initial_value"))
>> + goto out;
>> +
>> + init_data = bpf_map__initial_value(map, &init_data_sz);
>> + if (!ASSERT_OK_PTR(init_data, "bpf_map__initial_value"))
>> + goto out;
>> +
>> + ASSERT_EQ(*init_data, init_value, "initial_value");
>> + ASSERT_EQ(init_data_sz, sizeof(init_value), "initial_value size");
>> +
>> + if (!ASSERT_EQ((void *) init_data, (void *) skel->percpu, "skel->percpu"))
>> + goto out;
>> + ASSERT_EQ(skel->percpu->percpu_data, init_value, "skel->percpu->percpu_data");
>> +
>> + err = test_global_percpu_data__load(skel);
>> + if (err == -EOPNOTSUPP)
>> + goto out;
>> + if (!ASSERT_OK(err, "test_global_percpu_data__load"))
>> + goto out;
>> +
>> + ASSERT_EQ(bpf_map__type(map), BPF_MAP_TYPE_PERCPU_ARRAY,
>> + "bpf_map__type");
>> +
>> + prog_fd = bpf_program__fd(skel->progs.update_percpu_data);
>> + err = bpf_prog_test_run_opts(prog_fd, &topts);
>
> at least one of BPF programs (don't remember which one, could be
> raw_tp) supports specifying CPU index to run on, it would be nice to
> loop over CPUs, triggering BPF program on each one and filling per-CPU
> variable with current CPU index. Then we can check that all per-CPU
> values have expected values.
>
Do you mean prog_tests/perf_buffer.c::trigger_on_cpu()?
Your suggestion looks good to me. I'll do it.
>
>> + ASSERT_OK(err, "update_percpu_data");
>> + ASSERT_EQ(topts.retval, 0, "update_percpu_data retval");
>> +
>> + key = 0;
>> + err = bpf_map__lookup_elem(map, &key, sizeof(key), percpu_data,
>> + value_sz, 0);
>> + if (!ASSERT_OK(err, "bpf_map__lookup_elem"))
>> + goto out;
>> +
[...]
Thanks,
Leon
^ permalink raw reply [flat|nested] 20+ messages in thread* Re: [PATCH bpf-next 4/4] selftests/bpf: Add a case to test global percpu data
2025-02-07 10:00 ` Leon Hwang
@ 2025-02-07 19:48 ` Andrii Nakryiko
2025-02-10 9:52 ` Leon Hwang
0 siblings, 1 reply; 20+ messages in thread
From: Andrii Nakryiko @ 2025-02-07 19:48 UTC (permalink / raw)
To: Leon Hwang
Cc: bpf, ast, daniel, andrii, yonghong.song, song, eddyz87, qmo, dxu,
kernel-patches-bot
On Fri, Feb 7, 2025 at 2:00 AM Leon Hwang <leon.hwang@linux.dev> wrote:
>
>
>
> On 6/2/25 08:09, Andrii Nakryiko wrote:
> > On Mon, Jan 27, 2025 at 8:22 AM Leon Hwang <leon.hwang@linux.dev> wrote:
> >>
> >> If the arch, like s390x, does not support percpu insn, this case won't
> >> test global percpu data by checking -EOPNOTSUPP when load prog.
> >>
> >> The following APIs have been tested for global percpu data:
> >> 1. bpf_map__set_initial_value()
> >> 2. bpf_map__initial_value()
> >> 3. generated percpu struct pointer that points to internal map's data
> >> 4. bpf_map__lookup_elem() for global percpu data map
> >>
> >> cd tools/testing/selftests/bpf; ./test_progs -t global_percpu_data
> >> 124 global_percpu_data_init:OK
> >> Summary: 1/0 PASSED, 0 SKIPPED, 0 FAILED
> >>
> >> Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
> >> ---
> >> .../bpf/prog_tests/global_data_init.c | 89 ++++++++++++++++++-
> >> .../bpf/progs/test_global_percpu_data.c | 21 +++++
> >> 2 files changed, 109 insertions(+), 1 deletion(-)
> >> create mode 100644 tools/testing/selftests/bpf/progs/test_global_percpu_data.c
> >>
[...]
> >> +void test_global_percpu_data_init(void)
> >> +{
> >> + struct test_global_percpu_data *skel = NULL;
> >> + u64 *percpu_data = NULL;
> >
> > there is that test_global_percpu_data__percpu type you are declaring
> > in the BPF skeleton, right? We should try using it here.
> >
>
> No. bpftool does not generate test_global_percpu_data__percpu. The
> struct for global variables is embedded into skeleton struct.
>
> Should we generate type for global variables?
we already have custom skeleton-specific type for .data, .rodata,
.bss, so we should provide one for .percpu as well, yes
>
> > And for that array access, we should make sure that it's __aligned(8),
> > so indexing by CPU index works correctly.
> >
>
> Ack.
>
> > Also, you define per-CPU variable as int, but here it is u64, what's
> > up with that?
> >
>
> Like __aligned(8), it's to make sure 8-bytes aligned. It's better to use
> __aligned(8).
It's hacky, and it won't work correctly on big-endian architectures.
But you shouldn't need that if we have a struct representing this
.percpu memory image. Just make sure that struct has 8 byte alignment
(from bpftool side during skeleton generation, probably).
[...]
> > at least one of BPF programs (don't remember which one, could be
> > raw_tp) supports specifying CPU index to run on, it would be nice to
> > loop over CPUs, triggering BPF program on each one and filling per-CPU
> > variable with current CPU index. Then we can check that all per-CPU
> > values have expected values.
> >
>
> Do you mean prog_tests/perf_buffer.c::trigger_on_cpu()?
>
No, look at `cpu` field of `struct bpf_test_run_opts`. We should have
a selftest using it, so you can work backwards from that.
> Your suggestion looks good to me. I'll do it.
>
> >
> >> + ASSERT_OK(err, "update_percpu_data");
> >> + ASSERT_EQ(topts.retval, 0, "update_percpu_data retval");
> >> +
> >> + key = 0;
> >> + err = bpf_map__lookup_elem(map, &key, sizeof(key), percpu_data,
> >> + value_sz, 0);
> >> + if (!ASSERT_OK(err, "bpf_map__lookup_elem"))
> >> + goto out;
> >> +
>
> [...]
>
> Thanks,
> Leon
>
>
^ permalink raw reply [flat|nested] 20+ messages in thread* Re: [PATCH bpf-next 4/4] selftests/bpf: Add a case to test global percpu data
2025-02-07 19:48 ` Andrii Nakryiko
@ 2025-02-10 9:52 ` Leon Hwang
2025-02-11 0:15 ` Andrii Nakryiko
0 siblings, 1 reply; 20+ messages in thread
From: Leon Hwang @ 2025-02-10 9:52 UTC (permalink / raw)
To: Andrii Nakryiko
Cc: bpf, ast, daniel, andrii, yonghong.song, song, eddyz87, qmo, dxu,
kernel-patches-bot
On 8/2/25 03:48, Andrii Nakryiko wrote:
> On Fri, Feb 7, 2025 at 2:00 AM Leon Hwang <leon.hwang@linux.dev> wrote:
>>
>>
>>
>> On 6/2/25 08:09, Andrii Nakryiko wrote:
>>> On Mon, Jan 27, 2025 at 8:22 AM Leon Hwang <leon.hwang@linux.dev> wrote:
>>>>
[...]
>>>> +void test_global_percpu_data_init(void)
>>>> +{
>>>> + struct test_global_percpu_data *skel = NULL;
>>>> + u64 *percpu_data = NULL;
>>>
>>> there is that test_global_percpu_data__percpu type you are declaring
>>> in the BPF skeleton, right? We should try using it here.
>>>
>>
>> No. bpftool does not generate test_global_percpu_data__percpu. The
>> struct for global variables is embedded into skeleton struct.
>>
>> Should we generate type for global variables?
>
> we already have custom skeleton-specific type for .data, .rodata,
> .bss, so we should provide one for .percpu as well, yes
>
Yes, I've generated it. But it should not add '__aligned(8)' to it. Or
bpf_map__set_initial_value() will fails because the aligned size is
different from the actual spec's value size.
If the actual value size is not __aligned(8), how should we lookup
element from percpu_array map?
The doc[0] does not provide a good practice for this case.
[0] https://docs.kernel.org/bpf/map_array.html#bpf-map-type-percpu-array
>>
>>> And for that array access, we should make sure that it's __aligned(8),
>>> so indexing by CPU index works correctly.
>>>
>>
>> Ack.
>>
>>> Also, you define per-CPU variable as int, but here it is u64, what's
>>> up with that?
>>>
>>
>> Like __aligned(8), it's to make sure 8-bytes aligned. It's better to use
>> __aligned(8).
>
> It's hacky, and it won't work correctly on big-endian architectures.
> But you shouldn't need that if we have a struct representing this
> .percpu memory image. Just make sure that struct has 8 byte alignment
> (from bpftool side during skeleton generation, probably).
>
> [...]
>
>>> at least one of BPF programs (don't remember which one, could be
>>> raw_tp) supports specifying CPU index to run on, it would be nice to
>>> loop over CPUs, triggering BPF program on each one and filling per-CPU
>>> variable with current CPU index. Then we can check that all per-CPU
>>> values have expected values.
>>>
>>
>> Do you mean prog_tests/perf_buffer.c::trigger_on_cpu()?
>>
>
> No, look at `cpu` field of `struct bpf_test_run_opts`. We should have
> a selftest using it, so you can work backwards from that.
>
By referencing raw_tp, which uses `opts.cpu`, if use it to test global
percpu data, it will fail to test on non-zero CPU, because
bpf_prog_test_run_skb() disallows setting `opts.cpu`.
Then, when `setaffinity` like perf_buffer.c::trigger_on_cpu(), it's OK
to run the adding selftests on all CPUs.
So, should I use `setaffinity` or change the bpf prog type from tc to
raw_tp to use `opts.cpu`?
Thanks,
Leon
^ permalink raw reply [flat|nested] 20+ messages in thread* Re: [PATCH bpf-next 4/4] selftests/bpf: Add a case to test global percpu data
2025-02-10 9:52 ` Leon Hwang
@ 2025-02-11 0:15 ` Andrii Nakryiko
2025-02-12 1:50 ` Leon Hwang
0 siblings, 1 reply; 20+ messages in thread
From: Andrii Nakryiko @ 2025-02-11 0:15 UTC (permalink / raw)
To: Leon Hwang
Cc: bpf, ast, daniel, andrii, yonghong.song, song, eddyz87, qmo, dxu,
kernel-patches-bot
On Mon, Feb 10, 2025 at 1:52 AM Leon Hwang <leon.hwang@linux.dev> wrote:
>
>
>
> On 8/2/25 03:48, Andrii Nakryiko wrote:
> > On Fri, Feb 7, 2025 at 2:00 AM Leon Hwang <leon.hwang@linux.dev> wrote:
> >>
> >>
> >>
> >> On 6/2/25 08:09, Andrii Nakryiko wrote:
> >>> On Mon, Jan 27, 2025 at 8:22 AM Leon Hwang <leon.hwang@linux.dev> wrote:
> >>>>
>
> [...]
>
> >>>> +void test_global_percpu_data_init(void)
> >>>> +{
> >>>> + struct test_global_percpu_data *skel = NULL;
> >>>> + u64 *percpu_data = NULL;
> >>>
> >>> there is that test_global_percpu_data__percpu type you are declaring
> >>> in the BPF skeleton, right? We should try using it here.
> >>>
> >>
> >> No. bpftool does not generate test_global_percpu_data__percpu. The
> >> struct for global variables is embedded into skeleton struct.
> >>
> >> Should we generate type for global variables?
> >
> > we already have custom skeleton-specific type for .data, .rodata,
> > .bss, so we should provide one for .percpu as well, yes
> >
>
> Yes, I've generated it. But it should not add '__aligned(8)' to it. Or
> bpf_map__set_initial_value() will fails because the aligned size is
> different from the actual spec's value size.
>
> If the actual value size is not __aligned(8), how should we lookup
> element from percpu_array map?
for .percpu libbpf can ensure that map is created with correct value
size that matches __aligned(8) size? It's an error-prone corner case
to non-multiple-of-8 size anyways (for per-CPU data), so just prevent
the issue altogether?...
>
> The doc[0] does not provide a good practice for this case.
>
> [0] https://docs.kernel.org/bpf/map_array.html#bpf-map-type-percpu-array
>
> >>
> >>> And for that array access, we should make sure that it's __aligned(8),
> >>> so indexing by CPU index works correctly.
> >>>
> >>
> >> Ack.
> >>
> >>> Also, you define per-CPU variable as int, but here it is u64, what's
> >>> up with that?
> >>>
> >>
> >> Like __aligned(8), it's to make sure 8-bytes aligned. It's better to use
> >> __aligned(8).
> >
> > It's hacky, and it won't work correctly on big-endian architectures.
> > But you shouldn't need that if we have a struct representing this
> > .percpu memory image. Just make sure that struct has 8 byte alignment
> > (from bpftool side during skeleton generation, probably).
> >
> > [...]
> >
> >>> at least one of BPF programs (don't remember which one, could be
> >>> raw_tp) supports specifying CPU index to run on, it would be nice to
> >>> loop over CPUs, triggering BPF program on each one and filling per-CPU
> >>> variable with current CPU index. Then we can check that all per-CPU
> >>> values have expected values.
> >>>
> >>
> >> Do you mean prog_tests/perf_buffer.c::trigger_on_cpu()?
> >>
> >
> > No, look at `cpu` field of `struct bpf_test_run_opts`. We should have
> > a selftest using it, so you can work backwards from that.
> >
>
> By referencing raw_tp, which uses `opts.cpu`, if use it to test global
> percpu data, it will fail to test on non-zero CPU, because
> bpf_prog_test_run_skb() disallows setting `opts.cpu`.
>
> Then, when `setaffinity` like perf_buffer.c::trigger_on_cpu(), it's OK
> to run the adding selftests on all CPUs.
>
> So, should I use `setaffinity` or change the bpf prog type from tc to
> raw_tp to use `opts.cpu`?
Is it a problem to use raw_tp (we do it a lot)? If not, I'd go with
raw_tp and opts.cpu.
>
> Thanks,
> Leon
>
^ permalink raw reply [flat|nested] 20+ messages in thread* Re: [PATCH bpf-next 4/4] selftests/bpf: Add a case to test global percpu data
2025-02-11 0:15 ` Andrii Nakryiko
@ 2025-02-12 1:50 ` Leon Hwang
0 siblings, 0 replies; 20+ messages in thread
From: Leon Hwang @ 2025-02-12 1:50 UTC (permalink / raw)
To: Andrii Nakryiko
Cc: bpf, ast, daniel, andrii, yonghong.song, song, eddyz87, qmo, dxu,
kernel-patches-bot
On 11/2/25 08:15, Andrii Nakryiko wrote:
> On Mon, Feb 10, 2025 at 1:52 AM Leon Hwang <leon.hwang@linux.dev> wrote:
>>
>>
>>
>> On 8/2/25 03:48, Andrii Nakryiko wrote:
>>> On Fri, Feb 7, 2025 at 2:00 AM Leon Hwang <leon.hwang@linux.dev> wrote:
[...]
>>
>> Yes, I've generated it. But it should not add '__aligned(8)' to it. Or
>> bpf_map__set_initial_value() will fails because the aligned size is
>> different from the actual spec's value size.
>>
>> If the actual value size is not __aligned(8), how should we lookup
>> element from percpu_array map?
>
> for .percpu libbpf can ensure that map is created with correct value
> size that matches __aligned(8) size? It's an error-prone corner case
> to non-multiple-of-8 size anyways (for per-CPU data), so just prevent
> the issue altogether?...
>
Ack.
I'll update value size of .percpu maps to match __aligned(8) size, and
add '__aligned(8)' to the generated .percpu types.
>>
>> The doc[0] does not provide a good practice for this case.
>>
>> [0] https://docs.kernel.org/bpf/map_array.html#bpf-map-type-percpu-array
>>
>>>>
>>>>> And for that array access, we should make sure that it's __aligned(8),
>>>>> so indexing by CPU index works correctly.
>>>>>
>>>>
>>>> Ack.
>>>>
>>>>> Also, you define per-CPU variable as int, but here it is u64, what's
>>>>> up with that?
>>>>>
>>>>
>>>> Like __aligned(8), it's to make sure 8-bytes aligned. It's better to use
>>>> __aligned(8).
>>>
>>> It's hacky, and it won't work correctly on big-endian architectures.
>>> But you shouldn't need that if we have a struct representing this
>>> .percpu memory image. Just make sure that struct has 8 byte alignment
>>> (from bpftool side during skeleton generation, probably).
>>>
>>> [...]
>>>
>>>>> at least one of BPF programs (don't remember which one, could be
>>>>> raw_tp) supports specifying CPU index to run on, it would be nice to
>>>>> loop over CPUs, triggering BPF program on each one and filling per-CPU
>>>>> variable with current CPU index. Then we can check that all per-CPU
>>>>> values have expected values.
>>>>>
>>>>
>>>> Do you mean prog_tests/perf_buffer.c::trigger_on_cpu()?
>>>>
>>>
>>> No, look at `cpu` field of `struct bpf_test_run_opts`. We should have
>>> a selftest using it, so you can work backwards from that.
>>>
>>
>> By referencing raw_tp, which uses `opts.cpu`, if use it to test global
>> percpu data, it will fail to test on non-zero CPU, because
>> bpf_prog_test_run_skb() disallows setting `opts.cpu`.
>>
>> Then, when `setaffinity` like perf_buffer.c::trigger_on_cpu(), it's OK
>> to run the adding selftests on all CPUs.
>>
>> So, should I use `setaffinity` or change the bpf prog type from tc to
>> raw_tp to use `opts.cpu`?
>
> Is it a problem to use raw_tp (we do it a lot)? If not, I'd go with
> raw_tp and opts.cpu.
>
There's no problem to use raw_tp. Let us use raw_tp and opts.cpu.
Thanks,
Leon
^ permalink raw reply [flat|nested] 20+ messages in thread