* [PATCH bpf-next v4 1/8] bpf: Drop duplicate blank lines in verifier
2026-04-14 13:24 [PATCH bpf-next v4 0/8] bpf: Introduce global percpu data Leon Hwang
@ 2026-04-14 13:24 ` Leon Hwang
2026-04-14 13:24 ` [PATCH bpf-next v4 2/8] bpf: Introduce global percpu data Leon Hwang
` (6 subsequent siblings)
7 siblings, 0 replies; 26+ messages in thread
From: Leon Hwang @ 2026-04-14 13:24 UTC (permalink / raw)
To: bpf
Cc: ast, andrii, daniel, yonghong.song, song, eddyz87, qmo, dxu,
leon.hwang, kernel-patches-bot
There are many adjacent blank lines in the verifier that have accumulated
over time.
Drop them for cleanup.
No functional changes intended.
Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
kernel/bpf/verifier.c | 18 ------------------
1 file changed, 18 deletions(-)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 9e4980128151..1757c5720503 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -513,7 +513,6 @@ static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id,
return ref_obj_uses > 1;
}
-
static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots)
{
int allocated_slots = state->allocated_stack / BPF_REG_SIZE;
@@ -619,7 +618,6 @@ static void __mark_dynptr_reg(struct bpf_reg_state *reg,
enum bpf_dynptr_type type,
bool first_slot, int dynptr_id);
-
static void mark_dynptr_stack_regs(struct bpf_verifier_env *env,
struct bpf_reg_state *sreg1,
struct bpf_reg_state *sreg2,
@@ -1655,7 +1653,6 @@ static bool same_callsites(struct bpf_verifier_state *a, struct bpf_verifier_sta
return true;
}
-
void bpf_free_backedges(struct bpf_scc_visit *visit)
{
struct bpf_scc_backedge *backedge, *next;
@@ -2625,7 +2622,6 @@ static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
return &elem->st;
}
-
static int cmp_subprogs(const void *a, const void *b)
{
return ((struct bpf_subprog_info *)a)->start -
@@ -3679,7 +3675,6 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
}
}
-
/* check if register is a constant scalar value */
static bool is_reg_const(struct bpf_reg_state *reg, bool subreg32)
{
@@ -4347,7 +4342,6 @@ static int check_stack_read(struct bpf_verifier_env *env,
return err;
}
-
/* check_stack_write dispatches to check_stack_write_fixed_off or
* check_stack_write_var_off.
*
@@ -5033,7 +5027,6 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
valid = false;
}
-
if (valid) {
env->insn_aux_data[insn_idx].ctx_field_size =
info.ctx_field_size;
@@ -6836,7 +6829,6 @@ static int check_stack_range_initialized(
if (err)
return err;
-
if (tnum_is_const(reg->var_off)) {
min_off = max_off = reg->var_off.value + off;
} else {
@@ -7541,7 +7533,6 @@ static bool is_iter_new_kfunc(struct bpf_kfunc_call_arg_meta *meta)
return meta->kfunc_flags & KF_ITER_NEW;
}
-
static bool is_iter_destroy_kfunc(struct bpf_kfunc_call_arg_meta *meta)
{
return meta->kfunc_flags & KF_ITER_DESTROY;
@@ -10459,7 +10450,6 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
if (!reg)
return -EFAULT;
-
if (meta.dynptr_id) {
verifier_bug(env, "meta.dynptr_id already set");
return -EFAULT;
@@ -10798,7 +10788,6 @@ static bool is_kfunc_release(struct bpf_kfunc_call_arg_meta *meta)
return meta->kfunc_flags & KF_RELEASE;
}
-
static bool is_kfunc_destructive(struct bpf_kfunc_call_arg_meta *meta)
{
return meta->kfunc_flags & KF_DESTRUCTIVE;
@@ -11571,7 +11560,6 @@ static int process_irq_flag(struct bpf_verifier_env *env, int regno,
return 0;
}
-
static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
{
struct btf_record *rec = reg_btf_record(reg);
@@ -16715,7 +16703,6 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
return 0;
}
-
static bool return_retval_range(struct bpf_verifier_env *env, struct bpf_retval_range *range)
{
enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
@@ -18587,8 +18574,6 @@ static void release_insn_arrays(struct bpf_verifier_env *env)
bpf_insn_array_release(env->insn_array_maps[i]);
}
-
-
/* The verifier does more data flow analysis than llvm and will not
* explore branches that are dead at run time. Malicious programs can
* have dead code too. Therefore replace all dead at-run-time code
@@ -18616,8 +18601,6 @@ static void sanitize_dead_code(struct bpf_verifier_env *env)
}
}
-
-
static void free_states(struct bpf_verifier_env *env)
{
struct bpf_verifier_state_list *sl;
@@ -18880,7 +18863,6 @@ static int do_check_main(struct bpf_verifier_env *env)
return ret;
}
-
static void print_verification_stats(struct bpf_verifier_env *env)
{
int i;
--
2.53.0
^ permalink raw reply related [flat|nested] 26+ messages in thread* [PATCH bpf-next v4 2/8] bpf: Introduce global percpu data
2026-04-14 13:24 [PATCH bpf-next v4 0/8] bpf: Introduce global percpu data Leon Hwang
2026-04-14 13:24 ` [PATCH bpf-next v4 1/8] bpf: Drop duplicate blank lines in verifier Leon Hwang
@ 2026-04-14 13:24 ` Leon Hwang
2026-04-14 14:10 ` bot+bpf-ci
2026-04-14 13:24 ` [PATCH bpf-next v4 3/8] libbpf: Probe percpu data feature Leon Hwang
` (5 subsequent siblings)
7 siblings, 1 reply; 26+ messages in thread
From: Leon Hwang @ 2026-04-14 13:24 UTC (permalink / raw)
To: bpf
Cc: ast, andrii, daniel, yonghong.song, song, eddyz87, qmo, dxu,
leon.hwang, kernel-patches-bot
Introduce global percpu data, inspired by the commit
6316f78306c1 ("Merge branch 'support-global-data'"). It enables the
definition of global percpu variables in BPF, similar to the
include/linux/percpu-defs.h::DEFINE_PER_CPU() macro.
For example, in BPF, it is able to define a global percpu variable like:
int data SEC(".percpu");
With this patch, tools like retsnoop [1] and bpfsnoop [2] can simplify
their BPF code for handling LBRs. The code can be updated from
static struct perf_branch_entry lbrs[1][MAX_LBR_ENTRIES] SEC(".data.lbrs");
to
static struct perf_branch_entry lbrs[MAX_LBR_ENTRIES] SEC(".percpu.lbrs");
This eliminates the need to retrieve the CPU ID using the
bpf_get_smp_processor_id() helper.
Additionally, by reusing global percpu data map, sharing information
between tail callers and callees or freplace callers and callees becomes
simpler compared to reusing percpu_array maps.
Links:
[1] https://github.com/anakryiko/retsnoop
[2] https://github.com/bpfsnoop/bpfsnoop
Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
kernel/bpf/arraymap.c | 41 +++++++++++++++++++++++++++++++++++++++--
kernel/bpf/fixups.c | 31 +++++++++++++++++++++++++++++++
kernel/bpf/verifier.c | 9 ++++++++-
3 files changed, 78 insertions(+), 3 deletions(-)
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 5e25e0353509..b85ac59d808c 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -261,6 +261,40 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
return this_cpu_ptr(array->pptrs[index & array->index_mask]);
}
+static int percpu_array_map_direct_value_addr(const struct bpf_map *map,
+ u64 *imm, u32 off)
+{
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+
+ if (map->max_entries != 1)
+ return -EOPNOTSUPP;
+ if (off >= map->value_size)
+ return -EINVAL;
+ if (!bpf_jit_supports_percpu_insn())
+ return -EOPNOTSUPP;
+
+ *imm = (u64) array->pptrs[0];
+ return 0;
+}
+
+static int percpu_array_map_direct_value_meta(const struct bpf_map *map,
+ u64 imm, u32 *off)
+{
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+ u64 base = (u64) array->pptrs[0];
+ u64 range = array->elem_size;
+
+ if (map->max_entries != 1)
+ return -EOPNOTSUPP;
+ if (imm < base || imm >= base + range)
+ return -ENOENT;
+ if (!bpf_jit_supports_percpu_insn())
+ return -EOPNOTSUPP;
+
+ *off = imm - base;
+ return 0;
+}
+
/* emit BPF instructions equivalent to C code of percpu_array_map_lookup_elem() */
static int percpu_array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
{
@@ -553,9 +587,10 @@ static int array_map_check_btf(struct bpf_map *map,
const struct btf_type *key_type,
const struct btf_type *value_type)
{
- /* One exception for keyless BTF: .bss/.data/.rodata map */
+ /* One exception for keyless BTF: .bss/.data/.rodata/.percpu map */
if (btf_type_is_void(key_type)) {
- if (map->map_type != BPF_MAP_TYPE_ARRAY ||
+ if ((map->map_type != BPF_MAP_TYPE_ARRAY &&
+ map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY) ||
map->max_entries != 1)
return -EINVAL;
@@ -834,6 +869,8 @@ const struct bpf_map_ops percpu_array_map_ops = {
.map_get_next_key = bpf_array_get_next_key,
.map_lookup_elem = percpu_array_map_lookup_elem,
.map_gen_lookup = percpu_array_map_gen_lookup,
+ .map_direct_value_addr = percpu_array_map_direct_value_addr,
+ .map_direct_value_meta = percpu_array_map_direct_value_meta,
.map_update_elem = array_map_update_elem,
.map_delete_elem = array_map_delete_elem,
.map_lookup_percpu_elem = percpu_array_map_lookup_percpu_elem,
diff --git a/kernel/bpf/fixups.c b/kernel/bpf/fixups.c
index 67c9b28767e1..5f16fd4e0461 100644
--- a/kernel/bpf/fixups.c
+++ b/kernel/bpf/fixups.c
@@ -1682,6 +1682,37 @@ int bpf_do_misc_fixups(struct bpf_verifier_env *env)
goto next_insn;
}
+ if (bpf_jit_supports_percpu_insn() &&
+ insn->code == (BPF_LD | BPF_IMM | BPF_DW) &&
+ (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
+ insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE)) {
+ struct bpf_map *map;
+
+ aux = &env->insn_aux_data[i + delta];
+ map = env->used_maps[aux->map_index];
+ if (map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY)
+ goto next_insn;
+
+ /*
+ * Reuse the original ld_imm64 insn, and add one
+ * mov64_percpu_reg insn.
+ */
+
+ insn_buf[0] = insn[1];
+ insn_buf[1] = BPF_MOV64_PERCPU_REG(insn->dst_reg, insn->dst_reg);
+ cnt = 2;
+
+ i++;
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ goto next_insn;
+ }
+
if (insn->code != (BPF_JMP | BPF_CALL))
goto next_insn;
if (insn->src_reg == BPF_PSEUDO_CALL)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 1757c5720503..8ec24d63a2c4 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6362,7 +6362,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
if (tnum_is_const(reg->var_off) &&
bpf_map_is_rdonly(map) &&
map->ops->map_direct_value_addr &&
- map->map_type != BPF_MAP_TYPE_INSN_ARRAY) {
+ map->map_type != BPF_MAP_TYPE_INSN_ARRAY &&
+ map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY) {
int map_off = off + reg->var_off.value;
u64 val = 0;
@@ -8358,6 +8359,12 @@ static int check_reg_const_str(struct bpf_verifier_env *env,
return -EACCES;
}
+ if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
+ verbose(env, "R%d points to percpu_array map which cannot be used as const string\n",
+ regno);
+ return -EACCES;
+ }
+
if (!bpf_map_is_rdonly(map)) {
verbose(env, "R%d does not point to a readonly map'\n", regno);
return -EACCES;
--
2.53.0
^ permalink raw reply related [flat|nested] 26+ messages in thread* Re: [PATCH bpf-next v4 2/8] bpf: Introduce global percpu data
2026-04-14 13:24 ` [PATCH bpf-next v4 2/8] bpf: Introduce global percpu data Leon Hwang
@ 2026-04-14 14:10 ` bot+bpf-ci
2026-04-14 14:19 ` Leon Hwang
0 siblings, 1 reply; 26+ messages in thread
From: bot+bpf-ci @ 2026-04-14 14:10 UTC (permalink / raw)
To: leon.hwang, bpf
Cc: ast, andrii, daniel, yonghong.song, song, eddyz87, qmo, dxu,
leon.hwang, kernel-patches-bot, ast, andrii, daniel, martin.lau,
eddyz87, yonghong.song, clm, ihor.solodrai
[-- Attachment #1: Type: text/plain, Size: 2121 bytes --]
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 1757c5720503..8ec24d63a2c4 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -6362,7 +6362,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
> if (tnum_is_const(reg->var_off) &&
> bpf_map_is_rdonly(map) &&
> map->ops->map_direct_value_addr &&
> - map->map_type != BPF_MAP_TYPE_INSN_ARRAY) {
> + map->map_type != BPF_MAP_TYPE_INSN_ARRAY &&
> + map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY) {
This correctly prevents constant folding of percpu array values in
check_mem_access(). However, should the same exclusion also be
added to const_reg_xfer() in kernel/bpf/const_fold.c?
That function has a parallel constant folding path via
bpf_map_direct_read() that only excludes BPF_MAP_TYPE_INSN_ARRAY:
kernel/bpf/const_fold.c:const_reg_xfer() {
...
case BPF_LDX:
...
if (!bpf_map_is_rdonly(map) ||
!map->ops->map_direct_value_addr ||
map->map_type == BPF_MAP_TYPE_INSN_ARRAY ||
off < 0 || off + size > map->value_size ||
bpf_map_direct_read(map, off, size, &val, is_ldsx)) {
*dst = unknown;
break;
}
...
}
Since percpu_array_map_ops now has map_direct_value_addr set,
bpf_map_direct_read() would dereference the raw percpu base pointer
directly, rather than the actual per-cpu address. For a frozen
read-only percpu array, the constant-folded value could differ from
what the program reads at runtime after the BPF_MOV64_PERCPU_REG
fixup converts the address to the correct per-cpu copy.
In the v3 series, bpf_map_direct_read() itself had a guard
(map->map_type != BPF_MAP_TYPE_ARRAY), which protected all callers.
The v4 moved this to caller-side checks but appears to have missed
const_reg_xfer().
---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md
CI run summary: https://github.com/kernel-patches/bpf/actions/runs/24401974087
^ permalink raw reply [flat|nested] 26+ messages in thread* Re: [PATCH bpf-next v4 2/8] bpf: Introduce global percpu data
2026-04-14 14:10 ` bot+bpf-ci
@ 2026-04-14 14:19 ` Leon Hwang
2026-04-15 2:19 ` Alexei Starovoitov
0 siblings, 1 reply; 26+ messages in thread
From: Leon Hwang @ 2026-04-14 14:19 UTC (permalink / raw)
To: bot+bpf-ci, bpf
Cc: ast, andrii, daniel, yonghong.song, song, eddyz87, qmo, dxu,
kernel-patches-bot, martin.lau, clm, ihor.solodrai
On 2026/4/14 22:10, bot+bpf-ci@kernel.org wrote:
>> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
>> index 1757c5720503..8ec24d63a2c4 100644
>> --- a/kernel/bpf/verifier.c
>> +++ b/kernel/bpf/verifier.c
>> @@ -6362,7 +6362,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
>> if (tnum_is_const(reg->var_off) &&
>> bpf_map_is_rdonly(map) &&
>> map->ops->map_direct_value_addr &&
>> - map->map_type != BPF_MAP_TYPE_INSN_ARRAY) {
>> + map->map_type != BPF_MAP_TYPE_INSN_ARRAY &&
>> + map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY) {
>
> This correctly prevents constant folding of percpu array values in
> check_mem_access(). However, should the same exclusion also be
> added to const_reg_xfer() in kernel/bpf/const_fold.c?
>
> That function has a parallel constant folding path via
> bpf_map_direct_read() that only excludes BPF_MAP_TYPE_INSN_ARRAY:
>
> kernel/bpf/const_fold.c:const_reg_xfer() {
> ...
> case BPF_LDX:
> ...
> if (!bpf_map_is_rdonly(map) ||
> !map->ops->map_direct_value_addr ||
> map->map_type == BPF_MAP_TYPE_INSN_ARRAY ||
> off < 0 || off + size > map->value_size ||
> bpf_map_direct_read(map, off, size, &val, is_ldsx)) {
> *dst = unknown;
> break;
> }
> ...
> }
>
> Since percpu_array_map_ops now has map_direct_value_addr set,
> bpf_map_direct_read() would dereference the raw percpu base pointer
> directly, rather than the actual per-cpu address. For a frozen
> read-only percpu array, the constant-folded value could differ from
> what the program reads at runtime after the BPF_MOV64_PERCPU_REG
> fixup converts the address to the correct per-cpu copy.
>
> In the v3 series, bpf_map_direct_read() itself had a guard
> (map->map_type != BPF_MAP_TYPE_ARRAY), which protected all callers.
> The v4 moved this to caller-side checks but appears to have missed
> const_reg_xfer().
>
>
Correct.
Will add a guard in bpf_map_direct_read() in the next revision:
if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
return -EINVAL;
Thanks,
Leon
^ permalink raw reply [flat|nested] 26+ messages in thread* Re: [PATCH bpf-next v4 2/8] bpf: Introduce global percpu data
2026-04-14 14:19 ` Leon Hwang
@ 2026-04-15 2:19 ` Alexei Starovoitov
2026-04-17 1:30 ` Leon Hwang
0 siblings, 1 reply; 26+ messages in thread
From: Alexei Starovoitov @ 2026-04-15 2:19 UTC (permalink / raw)
To: Leon Hwang
Cc: bot+bpf-ci, bpf, ast, andrii, daniel, yonghong.song, song,
eddyz87, qmo, dxu, kernel-patches-bot, martin.lau, clm,
ihor.solodrai
On Tue, Apr 14, 2026 at 10:19:22PM +0800, Leon Hwang wrote:
> On 2026/4/14 22:10, bot+bpf-ci@kernel.org wrote:
> >> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> >> index 1757c5720503..8ec24d63a2c4 100644
> >> --- a/kernel/bpf/verifier.c
> >> +++ b/kernel/bpf/verifier.c
> >> @@ -6362,7 +6362,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
> >> if (tnum_is_const(reg->var_off) &&
> >> bpf_map_is_rdonly(map) &&
> >> map->ops->map_direct_value_addr &&
> >> - map->map_type != BPF_MAP_TYPE_INSN_ARRAY) {
> >> + map->map_type != BPF_MAP_TYPE_INSN_ARRAY &&
> >> + map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY) {
> >
> > This correctly prevents constant folding of percpu array values in
> > check_mem_access(). However, should the same exclusion also be
> > added to const_reg_xfer() in kernel/bpf/const_fold.c?
> >
> > That function has a parallel constant folding path via
> > bpf_map_direct_read() that only excludes BPF_MAP_TYPE_INSN_ARRAY:
> >
> > kernel/bpf/const_fold.c:const_reg_xfer() {
> > ...
> > case BPF_LDX:
> > ...
> > if (!bpf_map_is_rdonly(map) ||
> > !map->ops->map_direct_value_addr ||
> > map->map_type == BPF_MAP_TYPE_INSN_ARRAY ||
> > off < 0 || off + size > map->value_size ||
> > bpf_map_direct_read(map, off, size, &val, is_ldsx)) {
> > *dst = unknown;
> > break;
> > }
> > ...
> > }
> >
> > Since percpu_array_map_ops now has map_direct_value_addr set,
> > bpf_map_direct_read() would dereference the raw percpu base pointer
> > directly, rather than the actual per-cpu address. For a frozen
> > read-only percpu array, the constant-folded value could differ from
> > what the program reads at runtime after the BPF_MOV64_PERCPU_REG
> > fixup converts the address to the correct per-cpu copy.
> >
> > In the v3 series, bpf_map_direct_read() itself had a guard
> > (map->map_type != BPF_MAP_TYPE_ARRAY), which protected all callers.
> > The v4 moved this to caller-side checks but appears to have missed
> > const_reg_xfer().
> >
> >
> Correct.
>
> Will add a guard in bpf_map_direct_read() in the next revision:
>
> if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
> return -EINVAL;
hold on.
map->ops->map_direct_value_addr &&
- map->map_type != BPF_MAP_TYPE_INSN_ARRAY) {
+ map->map_type != BPF_MAP_TYPE_INSN_ARRAY &&
+ map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY) {
map_direct_value_addr() is set, but then immediately disallowed ?
Where else it's used?
Even if value_addr is working, then map_direct_value_meta() looks broken.
^ permalink raw reply [flat|nested] 26+ messages in thread* Re: [PATCH bpf-next v4 2/8] bpf: Introduce global percpu data
2026-04-15 2:19 ` Alexei Starovoitov
@ 2026-04-17 1:30 ` Leon Hwang
2026-04-17 15:48 ` Leon Hwang
0 siblings, 1 reply; 26+ messages in thread
From: Leon Hwang @ 2026-04-17 1:30 UTC (permalink / raw)
To: Alexei Starovoitov
Cc: bot+bpf-ci, bpf, ast, andrii, daniel, yonghong.song, song,
eddyz87, qmo, dxu, kernel-patches-bot, martin.lau, clm,
ihor.solodrai
On 15/4/26 10:19, Alexei Starovoitov wrote:
> On Tue, Apr 14, 2026 at 10:19:22PM +0800, Leon Hwang wrote:
>> On 2026/4/14 22:10, bot+bpf-ci@kernel.org wrote:
[...]
>>> In the v3 series, bpf_map_direct_read() itself had a guard
>>> (map->map_type != BPF_MAP_TYPE_ARRAY), which protected all callers.
>>> The v4 moved this to caller-side checks but appears to have missed
>>> const_reg_xfer().
>>>
>>>
>> Correct.
>>
>> Will add a guard in bpf_map_direct_read() in the next revision:
>>
>> if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
>> return -EINVAL;
>
> hold on.
> map->ops->map_direct_value_addr &&
> - map->map_type != BPF_MAP_TYPE_INSN_ARRAY) {
> + map->map_type != BPF_MAP_TYPE_INSN_ARRAY &&
> + map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY) {
>
> map_direct_value_addr() is set, but then immediately disallowed ?
> Where else it's used?
>
> Even if value_addr is working, then map_direct_value_meta() looks broken.
>
Ah, let me dive deeper.
Thanks,
Leon
^ permalink raw reply [flat|nested] 26+ messages in thread* Re: [PATCH bpf-next v4 2/8] bpf: Introduce global percpu data
2026-04-17 1:30 ` Leon Hwang
@ 2026-04-17 15:48 ` Leon Hwang
2026-04-17 17:03 ` Alexei Starovoitov
0 siblings, 1 reply; 26+ messages in thread
From: Leon Hwang @ 2026-04-17 15:48 UTC (permalink / raw)
To: Alexei Starovoitov
Cc: bot+bpf-ci, bpf, ast, andrii, daniel, yonghong.song, song,
eddyz87, qmo, dxu, kernel-patches-bot, martin.lau, clm,
ihor.solodrai
On 2026/4/17 09:30, Leon Hwang wrote:
> On 15/4/26 10:19, Alexei Starovoitov wrote:
>> On Tue, Apr 14, 2026 at 10:19:22PM +0800, Leon Hwang wrote:
>>> On 2026/4/14 22:10, bot+bpf-ci@kernel.org wrote:
> [...]
>>>> In the v3 series, bpf_map_direct_read() itself had a guard
>>>> (map->map_type != BPF_MAP_TYPE_ARRAY), which protected all callers.
>>>> The v4 moved this to caller-side checks but appears to have missed
>>>> const_reg_xfer().
>>>>
>>>>
>>> Correct.
>>>
>>> Will add a guard in bpf_map_direct_read() in the next revision:
>>>
>>> if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
>>> return -EINVAL;
>>
>> hold on.
>> map->ops->map_direct_value_addr &&
>> - map->map_type != BPF_MAP_TYPE_INSN_ARRAY) {
>> + map->map_type != BPF_MAP_TYPE_INSN_ARRAY &&
>> + map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY) {
>>
>> map_direct_value_addr() is set, but then immediately disallowed ?
>> Where else it's used?
>>
>> Even if value_addr is working, then map_direct_value_meta() looks broken.
>>
IIUC, map_direct_value_meta() is only used for dumping xlated insns. If
no available map_direct_value_addr(), map_direct_value_meta() won't be
called.
>
> Ah, let me dive deeper.
>
As for the above changes, let me explain them using diff snippet.
@@ -5808,6 +5808,8 @@ int bpf_map_direct_read(struct bpf_map *map, int
off, int size, u64 *val,
u64 addr;
int err;
+ if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
+ return -EINVAL;
err = map->ops->map_direct_value_addr(map, &addr, off);
if (err)
return err;
It is to guard percpu_array map against const_reg_xfer(). Instead of
updating const_reg_xfer(), better to update bpf_map_direct_read(). WDYT?
@@ -6362,7 +6364,8 @@ static int check_mem_access(struct
bpf_verifier_env *env, int insn_idx, u32 regn
if (tnum_is_const(reg->var_off) &&
bpf_map_is_rdonly(map) &&
map->ops->map_direct_value_addr &&
- map->map_type != BPF_MAP_TYPE_INSN_ARRAY) {
+ map->map_type != BPF_MAP_TYPE_INSN_ARRAY &&
+ map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY) {
Since directly reading data from global percpu data is different from
the way of global data, such reading data for global percpu data must
perform the 'this_cpu_off' to read the data on current CPU.
At the moment of load time, the reading cannot read data like the way of
global data, also cannot cause verifier failure.
I'll add a test against bpf_map_is_rdonly()+percpu_array.
int map_off = off + reg->var_off.value;
u64 val = 0;
err = bpf_map_direct_read(map, map_off, size,
&val, is_ldsx);
if (err)
return err;
regs[value_regno].type = SCALAR_VALUE;
__mark_reg_known(®s[value_regno], val);
Thanks,
Leon
^ permalink raw reply [flat|nested] 26+ messages in thread* Re: [PATCH bpf-next v4 2/8] bpf: Introduce global percpu data
2026-04-17 15:48 ` Leon Hwang
@ 2026-04-17 17:03 ` Alexei Starovoitov
0 siblings, 0 replies; 26+ messages in thread
From: Alexei Starovoitov @ 2026-04-17 17:03 UTC (permalink / raw)
To: Leon Hwang
Cc: bot+bpf-ci, bpf, Alexei Starovoitov, Andrii Nakryiko,
Daniel Borkmann, Yonghong Song, Song Liu, Eduard, Quentin Monnet,
Daniel Xu, kernel-patches-bot, Martin KaFai Lau, Chris Mason,
Ihor Solodrai
On Fri, Apr 17, 2026 at 8:48 AM Leon Hwang <leon.hwang@linux.dev> wrote:
>
> On 2026/4/17 09:30, Leon Hwang wrote:
> > On 15/4/26 10:19, Alexei Starovoitov wrote:
> >> On Tue, Apr 14, 2026 at 10:19:22PM +0800, Leon Hwang wrote:
> >>> On 2026/4/14 22:10, bot+bpf-ci@kernel.org wrote:
> > [...]
> >>>> In the v3 series, bpf_map_direct_read() itself had a guard
> >>>> (map->map_type != BPF_MAP_TYPE_ARRAY), which protected all callers.
> >>>> The v4 moved this to caller-side checks but appears to have missed
> >>>> const_reg_xfer().
> >>>>
> >>>>
> >>> Correct.
> >>>
> >>> Will add a guard in bpf_map_direct_read() in the next revision:
> >>>
> >>> if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
> >>> return -EINVAL;
> >>
> >> hold on.
> >> map->ops->map_direct_value_addr &&
> >> - map->map_type != BPF_MAP_TYPE_INSN_ARRAY) {
> >> + map->map_type != BPF_MAP_TYPE_INSN_ARRAY &&
> >> + map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY) {
> >>
> >> map_direct_value_addr() is set, but then immediately disallowed ?
> >> Where else it's used?
> >>
> >> Even if value_addr is working, then map_direct_value_meta() looks broken.
> >>
>
> IIUC, map_direct_value_meta() is only used for dumping xlated insns. If
> no available map_direct_value_addr(), map_direct_value_meta() won't be
> called.
yes, but then xlated insn are bogus, no?
> >
> > Ah, let me dive deeper.
> >
>
> As for the above changes, let me explain them using diff snippet.
>
> @@ -5808,6 +5808,8 @@ int bpf_map_direct_read(struct bpf_map *map, int
> off, int size, u64 *val,
> u64 addr;
> int err;
>
> + if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
> + return -EINVAL;
> err = map->ops->map_direct_value_addr(map, &addr, off);
> if (err)
> return err;
>
> It is to guard percpu_array map against const_reg_xfer(). Instead of
> updating const_reg_xfer(), better to update bpf_map_direct_read(). WDYT?
yeah and move map_type != BPF_MAP_TYPE_INSN_ARRAY check
into bpf_map_direct_read() as well.
To cleanup const_reg_xfer() a bit.
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH bpf-next v4 3/8] libbpf: Probe percpu data feature
2026-04-14 13:24 [PATCH bpf-next v4 0/8] bpf: Introduce global percpu data Leon Hwang
2026-04-14 13:24 ` [PATCH bpf-next v4 1/8] bpf: Drop duplicate blank lines in verifier Leon Hwang
2026-04-14 13:24 ` [PATCH bpf-next v4 2/8] bpf: Introduce global percpu data Leon Hwang
@ 2026-04-14 13:24 ` Leon Hwang
2026-04-14 13:24 ` [PATCH bpf-next v4 4/8] libbpf: Add support for global percpu data Leon Hwang
` (4 subsequent siblings)
7 siblings, 0 replies; 26+ messages in thread
From: Leon Hwang @ 2026-04-14 13:24 UTC (permalink / raw)
To: bpf
Cc: ast, andrii, daniel, yonghong.song, song, eddyz87, qmo, dxu,
leon.hwang, kernel-patches-bot
libbpf needs a reliable way to distinguish kernels that can support
global percpu data from those that cannot.
Add a dedicated feature probe, so libbpf can make capability decisions
early and fail predictably when global percpu data is unavailable.
Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
tools/lib/bpf/features.c | 35 +++++++++++++++++++++++++++++++++
tools/lib/bpf/libbpf_internal.h | 2 ++
2 files changed, 37 insertions(+)
diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c
index 4f19a0d79b0c..4ffdae42ffd7 100644
--- a/tools/lib/bpf/features.c
+++ b/tools/lib/bpf/features.c
@@ -615,6 +615,38 @@ static int probe_kern_btf_layout(int token_fd)
(char *)layout, token_fd));
}
+static int probe_kern_percpu_data(int token_fd)
+{
+ struct bpf_insn insns[] = {
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+ BPF_EXIT_INSN(),
+ };
+ LIBBPF_OPTS(bpf_map_create_opts, map_opts,
+ .token_fd = token_fd,
+ .map_flags = token_fd ? BPF_F_TOKEN_FD : 0,
+ );
+ LIBBPF_OPTS(bpf_prog_load_opts, prog_opts,
+ .token_fd = token_fd,
+ .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0,
+ );
+ int ret, map, insn_cnt = ARRAY_SIZE(insns);
+
+ map = bpf_map_create(BPF_MAP_TYPE_PERCPU_ARRAY, "libbpf_percpu", sizeof(int), 8, 1,
+ &map_opts);
+ if (map < 0) {
+ pr_warn("Error in %s(): %s. Couldn't create simple percpu_array map.\n",
+ __func__, errstr(map));
+ return map;
+ }
+
+ insns[0].imm = map;
+
+ ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &prog_opts);
+ close(map);
+ return probe_fd(ret);
+}
+
typedef int (*feature_probe_fn)(int /* token_fd */);
static struct kern_feature_cache feature_cache;
@@ -699,6 +731,9 @@ static struct kern_feature_desc {
[FEAT_BTF_LAYOUT] = {
"kernel supports BTF layout", probe_kern_btf_layout,
},
+ [FEAT_PERCPU_DATA] = {
+ "kernel supports percpu data", probe_kern_percpu_data,
+ },
};
bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id)
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 3781c45b46d3..f62da241a53f 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -398,6 +398,8 @@ enum kern_feature_id {
FEAT_UPROBE_SYSCALL,
/* Kernel supports BTF layout information */
FEAT_BTF_LAYOUT,
+ /* Kernel supports percpu data */
+ FEAT_PERCPU_DATA,
__FEAT_CNT,
};
--
2.53.0
^ permalink raw reply related [flat|nested] 26+ messages in thread* [PATCH bpf-next v4 4/8] libbpf: Add support for global percpu data
2026-04-14 13:24 [PATCH bpf-next v4 0/8] bpf: Introduce global percpu data Leon Hwang
` (2 preceding siblings ...)
2026-04-14 13:24 ` [PATCH bpf-next v4 3/8] libbpf: Probe percpu data feature Leon Hwang
@ 2026-04-14 13:24 ` Leon Hwang
2026-04-14 13:24 ` [PATCH bpf-next v4 5/8] bpf: Update per-CPU maps using BPF_F_ALL_CPUS flag Leon Hwang
` (3 subsequent siblings)
7 siblings, 0 replies; 26+ messages in thread
From: Leon Hwang @ 2026-04-14 13:24 UTC (permalink / raw)
To: bpf
Cc: ast, andrii, daniel, yonghong.song, song, eddyz87, qmo, dxu,
leon.hwang, kernel-patches-bot
Add support for global percpu data in libbpf by adding a new ".percpu"
section, similar to ".data". It enables efficient handling of percpu
global variables in bpf programs.
When generating loader for lightweight skeleton, update the percpu_array
map used for global percpu data using BPF_F_ALL_CPUS, in order to update
values across all CPUs using one value slot.
Unlike global data, the mmaped data for global percpu data will be marked
as read-only after populating the percpu_array map. Thereafter, users can
read those initialized percpu data after loading prog. If they want to
update the percpu data after loading prog, they have to update the
percpu_array map using key=0 instead.
Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
tools/lib/bpf/bpf_gen_internal.h | 3 +-
tools/lib/bpf/gen_loader.c | 3 +-
tools/lib/bpf/libbpf.c | 67 ++++++++++++++++++++++++++------
3 files changed, 59 insertions(+), 14 deletions(-)
diff --git a/tools/lib/bpf/bpf_gen_internal.h b/tools/lib/bpf/bpf_gen_internal.h
index 49af4260b8e6..5ea8383805d3 100644
--- a/tools/lib/bpf/bpf_gen_internal.h
+++ b/tools/lib/bpf/bpf_gen_internal.h
@@ -66,7 +66,8 @@ void bpf_gen__prog_load(struct bpf_gen *gen,
enum bpf_prog_type prog_type, const char *prog_name,
const char *license, struct bpf_insn *insns, size_t insn_cnt,
struct bpf_prog_load_opts *load_attr, int prog_idx);
-void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *value, __u32 value_size);
+void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *value, __u32 value_size,
+ __u64 flags);
void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx);
void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *name, enum bpf_attach_type type);
void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak,
diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c
index cd5c2543f54d..3374f2e01ef2 100644
--- a/tools/lib/bpf/gen_loader.c
+++ b/tools/lib/bpf/gen_loader.c
@@ -1158,7 +1158,7 @@ void bpf_gen__prog_load(struct bpf_gen *gen,
}
void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *pvalue,
- __u32 value_size)
+ __u32 value_size, __u64 flags)
{
int attr_size = offsetofend(union bpf_attr, flags);
int map_update_attr, value, key;
@@ -1166,6 +1166,7 @@ void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *pvalue,
int zero = 0;
memset(&attr, 0, attr_size);
+ attr.flags = flags;
value = add_data(gen, pvalue, value_size);
key = add_data(gen, &zero, sizeof(zero));
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 8b0c3246097f..576b71a28058 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -536,6 +536,7 @@ struct bpf_struct_ops {
};
#define DATA_SEC ".data"
+#define PERCPU_SEC ".percpu"
#define BSS_SEC ".bss"
#define RODATA_SEC ".rodata"
#define KCONFIG_SEC ".kconfig"
@@ -550,6 +551,7 @@ enum libbpf_map_type {
LIBBPF_MAP_BSS,
LIBBPF_MAP_RODATA,
LIBBPF_MAP_KCONFIG,
+ LIBBPF_MAP_PERCPU,
};
struct bpf_map_def {
@@ -661,6 +663,7 @@ enum sec_type {
SEC_DATA,
SEC_RODATA,
SEC_ST_OPS,
+ SEC_PERCPU,
};
struct elf_sec_desc {
@@ -1834,6 +1837,8 @@ static size_t bpf_map_mmap_sz(const struct bpf_map *map)
switch (map->def.type) {
case BPF_MAP_TYPE_ARRAY:
return array_map_mmap_sz(map->def.value_size, map->def.max_entries);
+ case BPF_MAP_TYPE_PERCPU_ARRAY:
+ return map->def.value_size;
case BPF_MAP_TYPE_ARENA:
return page_sz * map->def.max_entries;
default:
@@ -1933,7 +1938,7 @@ static bool map_is_mmapable(struct bpf_object *obj, struct bpf_map *map)
struct btf_var_secinfo *vsi;
int i, n;
- if (!map->btf_value_type_id)
+ if (!map->btf_value_type_id || map->libbpf_type == LIBBPF_MAP_PERCPU)
return false;
t = btf__type_by_id(obj->btf, map->btf_value_type_id);
@@ -1957,6 +1962,7 @@ static int
bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
const char *real_name, int sec_idx, void *data, size_t data_sz)
{
+ bool is_percpu = type == LIBBPF_MAP_PERCPU;
struct bpf_map_def *def;
struct bpf_map *map;
size_t mmap_sz;
@@ -1978,7 +1984,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
}
def = &map->def;
- def->type = BPF_MAP_TYPE_ARRAY;
+ def->type = is_percpu ? BPF_MAP_TYPE_PERCPU_ARRAY : BPF_MAP_TYPE_ARRAY;
def->key_size = sizeof(int);
def->value_size = data_sz;
def->max_entries = 1;
@@ -1991,8 +1997,9 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
if (map_is_mmapable(obj, map))
def->map_flags |= BPF_F_MMAPABLE;
- pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
- map->name, map->sec_idx, map->sec_offset, def->map_flags);
+ pr_debug("map '%s' (global %sdata): at sec_idx %d, offset %zu, flags %x.\n",
+ map->name, is_percpu ? "percpu " : "", map->sec_idx,
+ map->sec_offset, def->map_flags);
mmap_sz = bpf_map_mmap_sz(map);
map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
@@ -2052,6 +2059,13 @@ static int bpf_object__init_global_data_maps(struct bpf_object *obj)
NULL,
sec_desc->data->d_size);
break;
+ case SEC_PERCPU:
+ sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
+ err = bpf_object__init_internal_map(obj, LIBBPF_MAP_PERCPU,
+ sec_name, sec_idx,
+ sec_desc->data->d_buf,
+ sec_desc->data->d_size);
+ break;
default:
/* skip */
break;
@@ -4011,6 +4025,11 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
sec_desc->sec_type = SEC_RODATA;
sec_desc->shdr = sh;
sec_desc->data = data;
+ } else if (strcmp(name, PERCPU_SEC) == 0 ||
+ str_has_pfx(name, PERCPU_SEC ".")) {
+ sec_desc->sec_type = SEC_PERCPU;
+ sec_desc->shdr = sh;
+ sec_desc->data = data;
} else if (strcmp(name, STRUCT_OPS_SEC) == 0 ||
strcmp(name, STRUCT_OPS_LINK_SEC) == 0 ||
strcmp(name, "?" STRUCT_OPS_SEC) == 0 ||
@@ -4539,6 +4558,7 @@ static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
case SEC_BSS:
case SEC_DATA:
case SEC_RODATA:
+ case SEC_PERCPU:
return true;
default:
return false;
@@ -4564,6 +4584,8 @@ bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
return LIBBPF_MAP_DATA;
case SEC_RODATA:
return LIBBPF_MAP_RODATA;
+ case SEC_PERCPU:
+ return LIBBPF_MAP_PERCPU;
default:
return LIBBPF_MAP_UNSPEC;
}
@@ -4939,7 +4961,7 @@ static int map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map)
/*
* LLVM annotates global data differently in BTF, that is,
- * only as '.data', '.bss' or '.rodata'.
+ * only as '.data', '.bss', '.percpu' or '.rodata'.
*/
if (!bpf_map__is_internal(map))
return -ENOENT;
@@ -5292,18 +5314,30 @@ static int
bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
{
enum libbpf_map_type map_type = map->libbpf_type;
+ bool is_percpu = map_type == LIBBPF_MAP_PERCPU;
+ __u64 update_flags = 0;
int err, zero = 0;
size_t mmap_sz;
+ if (is_percpu) {
+ if (!obj->gen_loader && !kernel_supports(obj, FEAT_PERCPU_DATA)) {
+ pr_warn("map '%s': kernel does not support percpu data.\n",
+ bpf_map__name(map));
+ return -EOPNOTSUPP;
+ }
+
+ update_flags = BPF_F_ALL_CPUS;
+ }
+
if (obj->gen_loader) {
bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps,
- map->mmaped, map->def.value_size);
+ map->mmaped, map->def.value_size, update_flags);
if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG)
bpf_gen__map_freeze(obj->gen_loader, map - obj->maps);
return 0;
}
- err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
+ err = bpf_map_update_elem(map->fd, &zero, map->mmaped, update_flags);
if (err) {
err = -errno;
pr_warn("map '%s': failed to set initial contents: %s\n",
@@ -5348,6 +5382,13 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
return err;
}
map->mmaped = mmaped;
+ } else if (is_percpu) {
+ if (mprotect(map->mmaped, mmap_sz, PROT_READ)) {
+ err = -errno;
+ pr_warn("map '%s': failed to mprotect() contents: %s\n",
+ bpf_map__name(map), errstr(err));
+ return err;
+ }
} else if (map->mmaped) {
munmap(map->mmaped, mmap_sz);
map->mmaped = NULL;
@@ -10705,16 +10746,18 @@ int bpf_map__fd(const struct bpf_map *map)
static bool map_uses_real_name(const struct bpf_map *map)
{
- /* Since libbpf started to support custom .data.* and .rodata.* maps,
- * their user-visible name differs from kernel-visible name. Users see
- * such map's corresponding ELF section name as a map name.
- * This check distinguishes .data/.rodata from .data.* and .rodata.*
- * maps to know which name has to be returned to the user.
+ /* Since libbpf started to support custom .data.*, .rodata.* and
+ * .percpu.* maps, their user-visible name differs from
+ * kernel-visible name. Users see such map's corresponding ELF section
+ * name as a map name. This check distinguishes plain .data/.rodata/.percpu
+ * from .data.*, .rodata.* and .percpu.* to choose which name to return.
*/
if (map->libbpf_type == LIBBPF_MAP_DATA && strcmp(map->real_name, DATA_SEC) != 0)
return true;
if (map->libbpf_type == LIBBPF_MAP_RODATA && strcmp(map->real_name, RODATA_SEC) != 0)
return true;
+ if (map->libbpf_type == LIBBPF_MAP_PERCPU && strcmp(map->real_name, PERCPU_SEC) != 0)
+ return true;
return false;
}
--
2.53.0
^ permalink raw reply related [flat|nested] 26+ messages in thread* [PATCH bpf-next v4 5/8] bpf: Update per-CPU maps using BPF_F_ALL_CPUS flag
2026-04-14 13:24 [PATCH bpf-next v4 0/8] bpf: Introduce global percpu data Leon Hwang
` (3 preceding siblings ...)
2026-04-14 13:24 ` [PATCH bpf-next v4 4/8] libbpf: Add support for global percpu data Leon Hwang
@ 2026-04-14 13:24 ` Leon Hwang
2026-04-14 21:02 ` sashiko-bot
2026-04-15 2:21 ` Alexei Starovoitov
2026-04-14 13:24 ` [PATCH bpf-next v4 6/8] bpftool: Generate skeleton for global percpu data Leon Hwang
` (2 subsequent siblings)
7 siblings, 2 replies; 26+ messages in thread
From: Leon Hwang @ 2026-04-14 13:24 UTC (permalink / raw)
To: bpf
Cc: ast, andrii, daniel, yonghong.song, song, eddyz87, qmo, dxu,
leon.hwang, kernel-patches-bot
When updating per-CPU maps via the lightweight skeleton loader, use
a single value slot across all CPUs. This avoids two potential issues
when updating on an M-CPU kernel with N cached slots (N < M), especially
when N is much smaller than M:
1) The update may trigger a page fault when copying data from the last
slot, as the read may go beyond the allocated buffer.
2) The update may copy unexpected data from slots [N, M-1].
Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
kernel/bpf/syscall.c | 15 +++++++++++++++
1 file changed, 15 insertions(+)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index b73b25c63073..f0f3785ef57d 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1785,6 +1785,21 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
goto err_put;
}
+ /*
+ * When updating per-CPU maps via the lightweight skeleton
+ * loader, use a single value slot across all CPUs. This avoids
+ * two potential issues when updating on an M-CPU kernel with
+ * N cached slots (N < M), especially when N is much smaller
+ * than M:
+ * 1) The update may trigger a page fault when copying data from
+ * the last slot, as the read may go beyond the allocated
+ * buffer.
+ * 2) The update may copy unexpected data from slots [N, M-1].
+ */
+ if (bpfptr_is_kernel(uattr) && bpf_map_supports_cpu_flags(map->map_type) &&
+ !(attr->flags & (BPF_F_CPU | BPF_F_ALL_CPUS)))
+ attr->flags |= BPF_F_ALL_CPUS;
+
err = bpf_map_check_op_flags(map, attr->flags, ~0);
if (err)
goto err_put;
--
2.53.0
^ permalink raw reply related [flat|nested] 26+ messages in thread* Re: [PATCH bpf-next v4 5/8] bpf: Update per-CPU maps using BPF_F_ALL_CPUS flag
2026-04-14 13:24 ` [PATCH bpf-next v4 5/8] bpf: Update per-CPU maps using BPF_F_ALL_CPUS flag Leon Hwang
@ 2026-04-14 21:02 ` sashiko-bot
2026-04-17 1:54 ` Leon Hwang
2026-04-15 2:21 ` Alexei Starovoitov
1 sibling, 1 reply; 26+ messages in thread
From: sashiko-bot @ 2026-04-14 21:02 UTC (permalink / raw)
To: Leon Hwang; +Cc: bpf
Sashiko AI review found 1 potential issue(s):
- [High] The patch unconditionally forces the BPF_F_ALL_CPUS flag for any kernel-originated per-CPU map update that omits CPU flags, breaking the bpf_sys_bpf() helper API for BPF_PROG_TYPE_SYSCALL programs.
--
commit 357a4fab1d0fc8586bb52bd3aab10e26fd39a2a6
Author: Leon Hwang <leon.hwang@linux.dev>
bpf: Update per-CPU maps using BPF_F_ALL_CPUS flag
This commit modifies map_update_elem() to automatically apply the
BPF_F_ALL_CPUS flag for kernel-originated updates to per-CPU maps when no
CPU flags are specified. This is intended to fix potential out-of-bounds
reads when updating maps via the lightweight skeleton loader on systems where
the number of cached slots is less than the number of CPUs.
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index b73b25c630734..f0f3785ef57d0 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -1785,6 +1785,21 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
> goto err_put;
> }
>
> + /*
> + * When updating per-CPU maps via the lightweight skeleton
> + * loader, use a single value slot across all CPUs. This avoids
> + * two potential issues when updating on an M-CPU kernel with
> + * N cached slots (N < M), especially when N is much smaller
> + * than M:
> + * 1) The update may trigger a page fault when copying data from
> + * the last slot, as the read may go beyond the allocated
> + * buffer.
> + * 2) The update may copy unexpected data from slots [N, M-1].
> + */
> + if (bpfptr_is_kernel(uattr) && bpf_map_supports_cpu_flags(map->map_type) &&
> + !(attr->flags & (BPF_F_CPU | BPF_F_ALL_CPUS)))
> + attr->flags |= BPF_F_ALL_CPUS;
Does this unintentionally break the bpf_sys_bpf() helper API for
BPF_PROG_TYPE_SYSCALL programs?
Because bpf_sys_bpf() also wraps the pointer with KERNEL_BPFPTR,
bpfptr_is_kernel(uattr) will evaluate to true for any eBPF program calling it.
Prior to this patch, a BPF_PROG_TYPE_SYSCALL program could legitimately
update a per-CPU map with distinct values for each CPU by providing an array
of num_possible_cpus() elements and omitting any CPU flags, which mirrors
standard user-space API semantics.
With this change, it appears the kernel silently overrides attr->flags by
setting BPF_F_ALL_CPUS. This truncates the internal value_size calculation
down to a single element (map->value_size), reads only the first element
of the BPF program's provided buffer, and incorrectly replicates it across
all CPUs.
Should this workaround for the skeleton loader be constrained to
bpftool gen skeleton instead of mutating the core syscall handler?
--
Sashiko AI review · https://sashiko.dev/#/patchset/20260414132421.63409-1-leon.hwang@linux.dev?part=5
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH bpf-next v4 5/8] bpf: Update per-CPU maps using BPF_F_ALL_CPUS flag
2026-04-14 21:02 ` sashiko-bot
@ 2026-04-17 1:54 ` Leon Hwang
0 siblings, 0 replies; 26+ messages in thread
From: Leon Hwang @ 2026-04-17 1:54 UTC (permalink / raw)
To: sashiko; +Cc: bpf
On 15/4/26 05:02, sashiko-bot@kernel.org wrote:
> Sashiko AI review found 1 potential issue(s):
[...]
>> + /*
>> + * When updating per-CPU maps via the lightweight skeleton
>> + * loader, use a single value slot across all CPUs. This avoids
>> + * two potential issues when updating on an M-CPU kernel with
>> + * N cached slots (N < M), especially when N is much smaller
>> + * than M:
>> + * 1) The update may trigger a page fault when copying data from
>> + * the last slot, as the read may go beyond the allocated
>> + * buffer.
>> + * 2) The update may copy unexpected data from slots [N, M-1].
>> + */
>> + if (bpfptr_is_kernel(uattr) && bpf_map_supports_cpu_flags(map->map_type) &&
>> + !(attr->flags & (BPF_F_CPU | BPF_F_ALL_CPUS)))
>> + attr->flags |= BPF_F_ALL_CPUS;
>
> Does this unintentionally break the bpf_sys_bpf() helper API for
> BPF_PROG_TYPE_SYSCALL programs?
>
> Because bpf_sys_bpf() also wraps the pointer with KERNEL_BPFPTR,
> bpfptr_is_kernel(uattr) will evaluate to true for any eBPF program calling it.
>
> Prior to this patch, a BPF_PROG_TYPE_SYSCALL program could legitimately
> update a per-CPU map with distinct values for each CPU by providing an array
> of num_possible_cpus() elements and omitting any CPU flags, which mirrors
> standard user-space API semantics.
>
> With this change, it appears the kernel silently overrides attr->flags by
> setting BPF_F_ALL_CPUS. This truncates the internal value_size calculation
> down to a single element (map->value_size), reads only the first element
> of the BPF program's provided buffer, and incorrectly replicates it across
> all CPUs.
>
> Should this workaround for the skeleton loader be constrained to
> bpftool gen skeleton instead of mutating the core syscall handler?
>
It was my intent. Even though BPF_PROG_TYPE_SYSCALL progs could be
broken, it should be fixed when verifying the oob issue is true for
SYSCALL progs.
This patch will be dropped from this series. And, a separate patch will
be posted if the oob issue is true.
Thanks,
Leon
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH bpf-next v4 5/8] bpf: Update per-CPU maps using BPF_F_ALL_CPUS flag
2026-04-14 13:24 ` [PATCH bpf-next v4 5/8] bpf: Update per-CPU maps using BPF_F_ALL_CPUS flag Leon Hwang
2026-04-14 21:02 ` sashiko-bot
@ 2026-04-15 2:21 ` Alexei Starovoitov
2026-04-17 1:33 ` Leon Hwang
1 sibling, 1 reply; 26+ messages in thread
From: Alexei Starovoitov @ 2026-04-15 2:21 UTC (permalink / raw)
To: Leon Hwang
Cc: bpf, ast, andrii, daniel, yonghong.song, song, eddyz87, qmo, dxu,
kernel-patches-bot
On Tue, Apr 14, 2026 at 09:24:17PM +0800, Leon Hwang wrote:
> When updating per-CPU maps via the lightweight skeleton loader, use
> a single value slot across all CPUs. This avoids two potential issues
> when updating on an M-CPU kernel with N cached slots (N < M), especially
> when N is much smaller than M:
>
> 1) The update may trigger a page fault when copying data from the last
> slot, as the read may go beyond the allocated buffer.
> 2) The update may copy unexpected data from slots [N, M-1].
>
> Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
> ---
> kernel/bpf/syscall.c | 15 +++++++++++++++
> 1 file changed, 15 insertions(+)
>
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index b73b25c63073..f0f3785ef57d 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -1785,6 +1785,21 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
> goto err_put;
> }
>
> + /*
> + * When updating per-CPU maps via the lightweight skeleton
> + * loader, use a single value slot across all CPUs. This avoids
> + * two potential issues when updating on an M-CPU kernel with
> + * N cached slots (N < M), especially when N is much smaller
> + * than M:
> + * 1) The update may trigger a page fault when copying data from
> + * the last slot, as the read may go beyond the allocated
> + * buffer.
> + * 2) The update may copy unexpected data from slots [N, M-1].
> + */
> + if (bpfptr_is_kernel(uattr) && bpf_map_supports_cpu_flags(map->map_type) &&
> + !(attr->flags & (BPF_F_CPU | BPF_F_ALL_CPUS)))
> + attr->flags |= BPF_F_ALL_CPUS;
This looks like a hack. It's not addressing the actual bug.
If there is a bug submit it separately with fixes tag.
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH bpf-next v4 5/8] bpf: Update per-CPU maps using BPF_F_ALL_CPUS flag
2026-04-15 2:21 ` Alexei Starovoitov
@ 2026-04-17 1:33 ` Leon Hwang
2026-04-17 16:07 ` Leon Hwang
0 siblings, 1 reply; 26+ messages in thread
From: Leon Hwang @ 2026-04-17 1:33 UTC (permalink / raw)
To: Alexei Starovoitov
Cc: bpf, ast, andrii, daniel, yonghong.song, song, eddyz87, qmo, dxu,
kernel-patches-bot
On 15/4/26 10:21, Alexei Starovoitov wrote:
> On Tue, Apr 14, 2026 at 09:24:17PM +0800, Leon Hwang wrote:
>> When updating per-CPU maps via the lightweight skeleton loader, use
>> a single value slot across all CPUs. This avoids two potential issues
>> when updating on an M-CPU kernel with N cached slots (N < M), especially
>> when N is much smaller than M:
>>
>> 1) The update may trigger a page fault when copying data from the last
>> slot, as the read may go beyond the allocated buffer.
>> 2) The update may copy unexpected data from slots [N, M-1].
>>
>> Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
>> ---
>> kernel/bpf/syscall.c | 15 +++++++++++++++
>> 1 file changed, 15 insertions(+)
>>
>> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
>> index b73b25c63073..f0f3785ef57d 100644
>> --- a/kernel/bpf/syscall.c
>> +++ b/kernel/bpf/syscall.c
>> @@ -1785,6 +1785,21 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
>> goto err_put;
>> }
>>
>> + /*
>> + * When updating per-CPU maps via the lightweight skeleton
>> + * loader, use a single value slot across all CPUs. This avoids
>> + * two potential issues when updating on an M-CPU kernel with
>> + * N cached slots (N < M), especially when N is much smaller
>> + * than M:
>> + * 1) The update may trigger a page fault when copying data from
>> + * the last slot, as the read may go beyond the allocated
>> + * buffer.
>> + * 2) The update may copy unexpected data from slots [N, M-1].
>> + */
>> + if (bpfptr_is_kernel(uattr) && bpf_map_supports_cpu_flags(map->map_type) &&
>> + !(attr->flags & (BPF_F_CPU | BPF_F_ALL_CPUS)))
>> + attr->flags |= BPF_F_ALL_CPUS;
>
> This looks like a hack. It's not addressing the actual bug.
> If there is a bug submit it separately with fixes tag.
Sure, will verify whether it is a bug. If it is, will fix it with
separate patch.
Thanks,
Leon
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH bpf-next v4 5/8] bpf: Update per-CPU maps using BPF_F_ALL_CPUS flag
2026-04-17 1:33 ` Leon Hwang
@ 2026-04-17 16:07 ` Leon Hwang
0 siblings, 0 replies; 26+ messages in thread
From: Leon Hwang @ 2026-04-17 16:07 UTC (permalink / raw)
To: Alexei Starovoitov
Cc: bpf, ast, andrii, daniel, yonghong.song, song, eddyz87, qmo, dxu,
kernel-patches-bot
On 2026/4/17 09:33, Leon Hwang wrote:
> On 15/4/26 10:21, Alexei Starovoitov wrote:
>> On Tue, Apr 14, 2026 at 09:24:17PM +0800, Leon Hwang wrote:
>>> When updating per-CPU maps via the lightweight skeleton loader, use
>>> a single value slot across all CPUs. This avoids two potential issues
>>> when updating on an M-CPU kernel with N cached slots (N < M), especially
>>> when N is much smaller than M:
>>>
>>> 1) The update may trigger a page fault when copying data from the last
>>> slot, as the read may go beyond the allocated buffer.
>>> 2) The update may copy unexpected data from slots [N, M-1].
>>>
>>> Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
>>> ---
>>> kernel/bpf/syscall.c | 15 +++++++++++++++
>>> 1 file changed, 15 insertions(+)
>>>
>>> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
>>> index b73b25c63073..f0f3785ef57d 100644
>>> --- a/kernel/bpf/syscall.c
>>> +++ b/kernel/bpf/syscall.c
>>> @@ -1785,6 +1785,21 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
>>> goto err_put;
>>> }
>>>
>>> + /*
>>> + * When updating per-CPU maps via the lightweight skeleton
>>> + * loader, use a single value slot across all CPUs. This avoids
>>> + * two potential issues when updating on an M-CPU kernel with
>>> + * N cached slots (N < M), especially when N is much smaller
>>> + * than M:
>>> + * 1) The update may trigger a page fault when copying data from
>>> + * the last slot, as the read may go beyond the allocated
>>> + * buffer.
>>> + * 2) The update may copy unexpected data from slots [N, M-1].
>>> + */
>>> + if (bpfptr_is_kernel(uattr) && bpf_map_supports_cpu_flags(map->map_type) &&
>>> + !(attr->flags & (BPF_F_CPU | BPF_F_ALL_CPUS)))
>>> + attr->flags |= BPF_F_ALL_CPUS;
>>
>> This looks like a hack. It's not addressing the actual bug.
>> If there is a bug submit it separately with fixes tag.
>
> Sure, will verify whether it is a bug. If it is, will fix it with
> separate patch.
>
By implementing two selftests [1] against syscall progs and lightweight
skeleton, it is not a real issue.
The assuming oob reading might occur when updating percpu_array map
using a small value buffer for syscall progs. However, the oob reading
won't make kernel panic, because copy kernel memory using
copy_from_kernel_nofault(). However, as for lskel, the oob reading
doesn't occur, even set value size as 32000 and update percpu_array map
using an int.
The assuming issue #2 that copies unexpected data is true for both
syscall progs and lskel. However, I think it's users' responsibility to
correctly update percpu_array map using big enough value data.
In conclusion, I'll drop this patch from this series, and won't send
separate patch because it is not a bug.
[1] https://github.com/Asphaltt/bpf/commits/bpf/lskel-oob/v1/
Thanks,
Leon
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH bpf-next v4 6/8] bpftool: Generate skeleton for global percpu data
2026-04-14 13:24 [PATCH bpf-next v4 0/8] bpf: Introduce global percpu data Leon Hwang
` (4 preceding siblings ...)
2026-04-14 13:24 ` [PATCH bpf-next v4 5/8] bpf: Update per-CPU maps using BPF_F_ALL_CPUS flag Leon Hwang
@ 2026-04-14 13:24 ` Leon Hwang
2026-04-14 21:26 ` sashiko-bot
2026-04-14 13:24 ` [PATCH bpf-next v4 7/8] selftests/bpf: Add tests to verify " Leon Hwang
2026-04-14 13:24 ` [PATCH bpf-next v4 8/8] selftests/bpf: Add a test to verify bpf_iter for " Leon Hwang
7 siblings, 1 reply; 26+ messages in thread
From: Leon Hwang @ 2026-04-14 13:24 UTC (permalink / raw)
To: bpf
Cc: ast, andrii, daniel, yonghong.song, song, eddyz87, qmo, dxu,
leon.hwang, kernel-patches-bot
Enhance bpftool to generate skeletons that properly handle global percpu
variables. The generated skeleton now includes a dedicated structure for
percpu data, allowing users to initialize and access percpu variables more
efficiently.
For global percpu variables, the skeleton now includes a nested
structure, e.g.:
struct test_global_percpu_data {
struct bpf_object_skeleton *skeleton;
struct bpf_object *obj;
struct {
struct bpf_map *percpu;
} maps;
// ...
struct test_global_percpu_data__percpu {
int data;
char run;
struct {
char set;
int i;
int nums[7];
} struct_data;
int nums[7];
} *percpu;
// ...
};
* The "struct test_global_percpu_data__percpu *percpu" points to
initialized data, which is actually "maps.percpu->mmaped".
* Before loading the skeleton, updating the
"struct test_global_percpu_data__percpu *percpu" modifies the initial
value of the corresponding global percpu variables.
* After loading the skeleton, "maps.percpu->mmaped" has been marked as
read-only. If users want to update the global percpu variables, they
have to update the "maps.percpu" map instead.
Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
tools/bpf/bpftool/gen.c | 24 ++++++++++++++----------
1 file changed, 14 insertions(+), 10 deletions(-)
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 2f9e10752e28..ec63eb871443 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -92,7 +92,7 @@ static void get_header_guard(char *guard, const char *obj_name, const char *suff
static bool get_map_ident(const struct bpf_map *map, char *buf, size_t buf_sz)
{
- static const char *sfxs[] = { ".data", ".rodata", ".bss", ".kconfig" };
+ static const char *sfxs[] = { ".data", ".rodata", ".bss", ".percpu", ".kconfig" };
const char *name = bpf_map__name(map);
int i, n;
@@ -117,7 +117,7 @@ static bool get_map_ident(const struct bpf_map *map, char *buf, size_t buf_sz)
static bool get_datasec_ident(const char *sec_name, char *buf, size_t buf_sz)
{
- static const char *pfxs[] = { ".data", ".rodata", ".bss", ".kconfig" };
+ static const char *pfxs[] = { ".data", ".rodata", ".bss", ".percpu", ".kconfig" };
int i, n;
/* recognize hard coded LLVM section name */
@@ -254,6 +254,11 @@ static const struct btf_type *find_type_for_map(struct btf *btf, const char *map
return NULL;
}
+static bool bpf_map_is_percpu_data(const struct bpf_map *map)
+{
+ return bpf_map__is_internal(map) && bpf_map__type(map) == BPF_MAP_TYPE_PERCPU_ARRAY;
+}
+
static bool is_mmapable_map(const struct bpf_map *map, char *buf, size_t sz)
{
size_t tmp_sz;
@@ -263,13 +268,12 @@ static bool is_mmapable_map(const struct bpf_map *map, char *buf, size_t sz)
return true;
}
- if (!bpf_map__is_internal(map) || !(bpf_map__map_flags(map) & BPF_F_MMAPABLE))
- return false;
-
- if (!get_map_ident(map, buf, sz))
- return false;
+ if (bpf_map__is_internal(map) &&
+ ((bpf_map__map_flags(map) & BPF_F_MMAPABLE) || bpf_map_is_percpu_data(map)) &&
+ get_map_ident(map, buf, sz))
+ return true;
- return true;
+ return false;
}
static int codegen_datasecs(struct bpf_object *obj, const char *obj_name)
@@ -669,7 +673,7 @@ static void codegen_destroy(struct bpf_object *obj, const char *obj_name)
if (!get_map_ident(map, ident, sizeof(ident)))
continue;
if (bpf_map__is_internal(map) &&
- (bpf_map__map_flags(map) & BPF_F_MMAPABLE))
+ ((bpf_map__map_flags(map) & BPF_F_MMAPABLE) || bpf_map_is_percpu_data(map)))
printf("\tskel_free_map_data(skel->%1$s, skel->maps.%1$s.initial_value, %2$zu);\n",
ident, bpf_map_mmap_sz(map));
codegen("\
@@ -847,7 +851,7 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h
bpf_object__for_each_map(map, obj) {
const char *mmap_flags;
- if (!is_mmapable_map(map, ident, sizeof(ident)))
+ if (!is_mmapable_map(map, ident, sizeof(ident)) || bpf_map_is_percpu_data(map))
continue;
if (bpf_map__map_flags(map) & BPF_F_RDONLY_PROG)
--
2.53.0
^ permalink raw reply related [flat|nested] 26+ messages in thread* Re: [PATCH bpf-next v4 6/8] bpftool: Generate skeleton for global percpu data
2026-04-14 13:24 ` [PATCH bpf-next v4 6/8] bpftool: Generate skeleton for global percpu data Leon Hwang
@ 2026-04-14 21:26 ` sashiko-bot
2026-04-17 2:01 ` Leon Hwang
0 siblings, 1 reply; 26+ messages in thread
From: sashiko-bot @ 2026-04-14 21:26 UTC (permalink / raw)
To: Leon Hwang; +Cc: bpf
Sashiko AI review found 1 potential issue(s):
- [High] Missing read-only memory protection for `.percpu` initial value buffer in generated light skeletons.
--
commit e61b5ee6e5a7cee758b644de0f6b4aaea40f020c
Author: Leon Hwang <leon.hwang@linux.dev>
bpftool: Generate skeleton for global percpu data
Enhance bpftool to generate skeletons that properly handle global percpu
variables. The skeleton includes a nested structure for percpu data, which
is marked as read-only after loading so users must update the map directly.
> diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
> --- a/tools/bpf/bpftool/gen.c
> +++ b/tools/bpf/bpftool/gen.c
[ ... ]
> @@ -847,7 +851,7 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h
> bpf_object__for_each_map(map, obj) {
> const char *mmap_flags;
>
> - if (!is_mmapable_map(map, ident, sizeof(ident)))
> + if (!is_mmapable_map(map, ident, sizeof(ident)) || bpf_map_is_percpu_data(map))
> continue;
>
> if (bpf_map__map_flags(map) & BPF_F_RDONLY_PROG)
The commit message notes that after loading the skeleton, maps.percpu->mmaped
has been marked as read-only.
By skipping the .percpu map entirely here during light skeleton generation
in gen_trace(), does this leave the user-space buffer for .percpu writable?
If a user modifies skel->percpu after the light skeleton is loaded, will it
silently succeed without actually updating the kernel per-cpu map? Does the
generated light skeleton need an explicit mprotect(..., PROT_READ) for the
.percpu initial value buffer to enforce the read-only constraint?
--
Sashiko AI review · https://sashiko.dev/#/patchset/20260414132421.63409-1-leon.hwang@linux.dev?part=6
^ permalink raw reply [flat|nested] 26+ messages in thread* Re: [PATCH bpf-next v4 6/8] bpftool: Generate skeleton for global percpu data
2026-04-14 21:26 ` sashiko-bot
@ 2026-04-17 2:01 ` Leon Hwang
0 siblings, 0 replies; 26+ messages in thread
From: Leon Hwang @ 2026-04-17 2:01 UTC (permalink / raw)
To: sashiko; +Cc: bpf
On 15/4/26 05:26, sashiko-bot@kernel.org wrote:
> Sashiko AI review found 1 potential issue(s):
> - [High] Missing read-only memory protection for `.percpu` initial value buffer in generated light skeletons.
> --
>
> commit e61b5ee6e5a7cee758b644de0f6b4aaea40f020c
> Author: Leon Hwang <leon.hwang@linux.dev>
>
> bpftool: Generate skeleton for global percpu data
>
> Enhance bpftool to generate skeletons that properly handle global percpu
> variables. The skeleton includes a nested structure for percpu data, which
> is marked as read-only after loading so users must update the map directly.
>
>> diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
>> --- a/tools/bpf/bpftool/gen.c
>> +++ b/tools/bpf/bpftool/gen.c
>
> [ ... ]
>
>> @@ -847,7 +851,7 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h
>> bpf_object__for_each_map(map, obj) {
>> const char *mmap_flags;
>>
>> - if (!is_mmapable_map(map, ident, sizeof(ident)))
>> + if (!is_mmapable_map(map, ident, sizeof(ident)) || bpf_map_is_percpu_data(map))
>> continue;
>>
>> if (bpf_map__map_flags(map) & BPF_F_RDONLY_PROG)
>
> The commit message notes that after loading the skeleton, maps.percpu->mmaped
> has been marked as read-only.
>
> By skipping the .percpu map entirely here during light skeleton generation
> in gen_trace(), does this leave the user-space buffer for .percpu writable?
>
> If a user modifies skel->percpu after the light skeleton is loaded, will it
> silently succeed without actually updating the kernel per-cpu map? Does the
> generated light skeleton need an explicit mprotect(..., PROT_READ) for the
> .percpu initial value buffer to enforce the read-only constraint?
>
The mprotect(..., PROT_READ) was added in patch #4.
I think it'd better to move the change to this patch. Then, readers
won't miss the mprotect().
Thanks,
Leon
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH bpf-next v4 7/8] selftests/bpf: Add tests to verify global percpu data
2026-04-14 13:24 [PATCH bpf-next v4 0/8] bpf: Introduce global percpu data Leon Hwang
` (5 preceding siblings ...)
2026-04-14 13:24 ` [PATCH bpf-next v4 6/8] bpftool: Generate skeleton for global percpu data Leon Hwang
@ 2026-04-14 13:24 ` Leon Hwang
2026-04-14 21:45 ` sashiko-bot
2026-04-14 13:24 ` [PATCH bpf-next v4 8/8] selftests/bpf: Add a test to verify bpf_iter for " Leon Hwang
7 siblings, 1 reply; 26+ messages in thread
From: Leon Hwang @ 2026-04-14 13:24 UTC (permalink / raw)
To: bpf
Cc: ast, andrii, daniel, yonghong.song, song, eddyz87, qmo, dxu,
leon.hwang, kernel-patches-bot
If the arch, like s390x, does not support percpu insn, these cases won't
test global percpu data by checking -EOPNOTSUPP after loading prog.
The following APIs have been tested for global percpu data:
1. bpf_map__set_initial_value()
2. bpf_map__initial_value()
3. generated percpu struct pointer pointing to internal map's mmaped data
4. bpf_map__lookup_elem() for global percpu data map
At the same time, the case is also tested with 'bpftool gen skeleton -L'.
The verifier log is also tested.
Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
tools/testing/selftests/bpf/Makefile | 2 +-
.../bpf/prog_tests/global_data_init.c | 223 ++++++++++++++++++
.../bpf/progs/test_global_percpu_data.c | 66 ++++++
3 files changed, 290 insertions(+), 1 deletion(-)
create mode 100644 tools/testing/selftests/bpf/progs/test_global_percpu_data.c
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index f75c4f52c028..39ae3583983b 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -504,7 +504,7 @@ LSKELS_SIGNED := fentry_test.c fexit_test.c atomics.c
# Generate both light skeleton and libbpf skeleton for these
LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test.c \
- kfunc_call_test_subprog.c
+ kfunc_call_test_subprog.c test_global_percpu_data.c
SKEL_BLACKLIST += $$(LSKELS) $$(LSKELS_SIGNED)
test_static_linked.skel.h-deps := test_static_linked1.bpf.o test_static_linked2.bpf.o
diff --git a/tools/testing/selftests/bpf/prog_tests/global_data_init.c b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
index 8466332d7406..0b0384b6515e 100644
--- a/tools/testing/selftests/bpf/prog_tests/global_data_init.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
@@ -1,5 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include "bpf/libbpf_internal.h"
+#include "test_global_percpu_data.skel.h"
+#include "test_global_percpu_data.lskel.h"
void test_global_data_init(void)
{
@@ -60,3 +63,223 @@ void test_global_data_init(void)
free(newval);
bpf_object__close(obj);
}
+
+static void test_global_percpu_data_init(void)
+{
+ struct test_global_percpu_data__percpu *percpu_data = NULL;
+ struct test_global_percpu_data__percpu *init_data, *data;
+ struct test_global_percpu_data__percpu init_value = {};
+ int key, prog_fd, err, num_cpus, num_online, i;
+ struct test_global_percpu_data *skel = NULL;
+ __u64 args[2] = {0x1234ULL, 0x5678ULL};
+ size_t elem_sz, init_data_sz;
+ struct bpf_map *map;
+ bool *online;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .ctx_in = args,
+ .ctx_size_in = sizeof(args),
+ .flags = BPF_F_TEST_RUN_ON_CPU,
+ );
+
+ num_cpus = libbpf_num_possible_cpus();
+ if (!ASSERT_GT(num_cpus, 0, "libbpf_num_possible_cpus"))
+ return;
+
+ err = parse_cpu_mask_file("/sys/devices/system/cpu/online",
+ &online, &num_online);
+ if (!ASSERT_OK(err, "parse_cpu_mask_file"))
+ return;
+
+ elem_sz = roundup(sizeof(*percpu_data), 8);
+ percpu_data = calloc(num_cpus, elem_sz);
+ if (!ASSERT_OK_PTR(percpu_data, "calloc percpu_data"))
+ goto out;
+
+ skel = test_global_percpu_data__open();
+ if (!ASSERT_OK_PTR(skel, "test_global_percpu_data__open"))
+ goto out;
+ if (!ASSERT_OK_PTR(skel->percpu, "skel->percpu"))
+ goto out;
+
+ ASSERT_EQ(skel->percpu->data, -1, "skel->percpu->data");
+ ASSERT_FALSE(skel->percpu->run, "skel->percpu->run");
+ ASSERT_EQ(skel->percpu->nums[6], 0, "skel->percpu->nums[6]");
+ ASSERT_EQ(skel->percpu->struct_data.i, -1, "struct_data.i");
+ ASSERT_FALSE(skel->percpu->struct_data.set, "struct_data.set");
+ ASSERT_EQ(skel->percpu->struct_data.nums[6], 0, "struct_data.nums[6]");
+
+ map = skel->maps.percpu;
+ if (!ASSERT_EQ(bpf_map__type(map), BPF_MAP_TYPE_PERCPU_ARRAY, "bpf_map__type"))
+ goto out;
+
+ init_value.data = 2;
+ init_value.nums[6] = -1;
+ init_value.struct_data.i = 2;
+ init_value.struct_data.nums[6] = -1;
+ err = bpf_map__set_initial_value(map, &init_value, sizeof(init_value));
+ if (!ASSERT_OK(err, "bpf_map__set_initial_value"))
+ goto out;
+
+ init_data = bpf_map__initial_value(map, &init_data_sz);
+ if (!ASSERT_OK_PTR(init_data, "bpf_map__initial_value"))
+ goto out;
+
+ ASSERT_EQ(init_data->data, init_value.data, "init_value data");
+ ASSERT_EQ(init_data->run, init_value.run, "init_value run");
+ ASSERT_EQ(init_data->struct_data.i, init_value.struct_data.i,
+ "init_value struct_data.i");
+ ASSERT_EQ(init_data->struct_data.nums[6],
+ init_value.struct_data.nums[6],
+ "init_value struct_data.nums[6]");
+ ASSERT_EQ(init_data_sz, sizeof(init_value), "init_value size");
+ ASSERT_EQ((void *) init_data, (void *) skel->percpu,
+ "skel->percpu eq init_data");
+ ASSERT_EQ(skel->percpu->data, init_value.data,
+ "skel->percpu->data");
+ ASSERT_EQ(skel->percpu->run, init_value.run,
+ "skel->percpu->run");
+ ASSERT_EQ(skel->percpu->struct_data.i, init_value.struct_data.i,
+ "skel->percpu->struct_data.i");
+ ASSERT_EQ(skel->percpu->struct_data.nums[6],
+ init_value.struct_data.nums[6],
+ "skel->percpu->struct_data.nums[6]");
+
+ err = test_global_percpu_data__load(skel);
+ if (!ASSERT_OK(err, "test_global_percpu_data__load"))
+ goto out;
+
+ ASSERT_OK_PTR(skel->percpu, "skel->percpu");
+
+ prog_fd = bpf_program__fd(skel->progs.update_percpu_data);
+
+ /* run on every CPU */
+ for (i = 0; i < num_online; i++) {
+ if (!online[i])
+ continue;
+
+ topts.cpu = i;
+ topts.retval = 0;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+ ASSERT_EQ(topts.retval, 0, "bpf_prog_test_run_opts retval");
+ }
+
+ key = 0;
+ err = bpf_map__lookup_elem(map, &key, sizeof(key), percpu_data,
+ elem_sz * num_cpus, 0);
+ if (!ASSERT_OK(err, "bpf_map__lookup_elem"))
+ goto out;
+
+ for (i = 0; i < num_online; i++) {
+ if (!online[i])
+ continue;
+
+ data = (void *)percpu_data + elem_sz * i;
+ ASSERT_EQ(data->data, 1, "percpu_data->data");
+ ASSERT_TRUE(data->run, "percpu_data->run");
+ ASSERT_EQ(data->nums[6], 0xc0de, "percpu_data->nums[6]");
+ ASSERT_EQ(data->struct_data.i, 1, "struct_data.i");
+ ASSERT_TRUE(data->struct_data.set, "struct_data.set");
+ ASSERT_EQ(data->struct_data.nums[6], 0xc0de, "struct_data.nums[6]");
+ }
+
+out:
+ test_global_percpu_data__destroy(skel);
+ free(percpu_data);
+ free(online);
+}
+
+static void test_global_percpu_data_lskel(void)
+{
+ struct test_global_percpu_data__percpu *data, *percpu_data = NULL;
+ int key, prog_fd, map_fd, err, num_cpus, num_online, i;
+ struct test_global_percpu_data_lskel *lskel = NULL;
+ __u64 args[2] = {0x1234ULL, 0x5678ULL};
+ size_t elem_sz;
+ bool *online;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .ctx_in = args,
+ .ctx_size_in = sizeof(args),
+ .flags = BPF_F_TEST_RUN_ON_CPU,
+ );
+
+ num_cpus = libbpf_num_possible_cpus();
+ if (!ASSERT_GT(num_cpus, 0, "libbpf_num_possible_cpus"))
+ return;
+
+ err = parse_cpu_mask_file("/sys/devices/system/cpu/online",
+ &online, &num_online);
+ if (!ASSERT_OK(err, "parse_cpu_mask_file"))
+ return;
+
+ elem_sz = roundup(sizeof(*percpu_data), 8);
+ percpu_data = calloc(num_cpus, elem_sz);
+ if (!ASSERT_OK_PTR(percpu_data, "calloc percpu_data"))
+ goto out;
+
+ lskel = test_global_percpu_data_lskel__open();
+ if (!ASSERT_OK_PTR(lskel, "test_global_percpu_data_lskel__open"))
+ goto out;
+
+ err = test_global_percpu_data_lskel__load(lskel);
+ if (!ASSERT_OK(err, "test_global_percpu_data_lskel__load"))
+ goto out;
+
+ prog_fd = lskel->progs.update_percpu_data.prog_fd;
+
+ /* run on every CPU */
+ for (i = 0; i < num_online; i++) {
+ if (!online[i])
+ continue;
+
+ topts.cpu = i;
+ topts.retval = 0;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+ ASSERT_EQ(topts.retval, 0, "bpf_prog_test_run_opts retval");
+ }
+
+ key = 0;
+ map_fd = lskel->maps.percpu.map_fd;
+ err = bpf_map_lookup_elem(map_fd, &key, percpu_data);
+ if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
+ goto out;
+
+ for (i = 0; i < num_online; i++) {
+ if (!online[i])
+ continue;
+
+ data = (void *)percpu_data + elem_sz * i;
+ ASSERT_EQ(data->data, 1, "percpu_data->data");
+ ASSERT_TRUE(data->run, "percpu_data->run");
+ ASSERT_EQ(data->nums[6], 0xc0de, "percpu_data->nums[6]");
+ ASSERT_EQ(data->struct_data.i, 1, "struct_data.i");
+ ASSERT_TRUE(data->struct_data.set, "struct_data.set");
+ ASSERT_EQ(data->struct_data.nums[6], 0xc0de, "struct_data.nums[6]");
+ }
+
+out:
+ test_global_percpu_data_lskel__destroy(lskel);
+ free(percpu_data);
+ free(online);
+}
+
+static void test_global_percpu_data_verifier_failure(void)
+{
+ RUN_TESTS(test_global_percpu_data);
+}
+
+void test_global_percpu_data(void)
+{
+ if (!feat_supported(NULL, FEAT_PERCPU_DATA)) {
+ test__skip();
+ return;
+ }
+
+ if (test__start_subtest("init"))
+ test_global_percpu_data_init();
+ if (test__start_subtest("lskel"))
+ test_global_percpu_data_lskel();
+ if (test__start_subtest("verifier_failure"))
+ test_global_percpu_data_verifier_failure();
+}
diff --git a/tools/testing/selftests/bpf/progs/test_global_percpu_data.c b/tools/testing/selftests/bpf/progs/test_global_percpu_data.c
new file mode 100644
index 000000000000..947721c21f30
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_global_percpu_data.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+int data SEC(".percpu") = -1;
+int nums[7] SEC(".percpu");
+char run SEC(".percpu") = 0;
+struct {
+ char set;
+ int i;
+ int nums[7];
+} struct_data SEC(".percpu") = {
+ .set = 0,
+ .i = -1,
+};
+
+SEC("raw_tp/task_rename")
+__auxiliary
+int update_percpu_data(void *ctx)
+{
+ struct_data.nums[6] = 0xc0de;
+ struct_data.set = 1;
+ struct_data.i = 1;
+ nums[6] = 0xc0de;
+ data = 1;
+ run = 1;
+ return 0;
+}
+
+static const char fmt[] SEC(".percpu.fmt") = "data %d\n";
+
+SEC("?kprobe")
+__failure __msg("R{{[0-9]+}} points to percpu_array map which cannot be used as const string")
+int verifier_strncmp(void *ctx)
+{
+ return bpf_strncmp("test", 5, fmt);
+}
+
+SEC("?kprobe")
+__failure __msg("R{{[0-9]+}} points to percpu_array map which cannot be used as const string")
+int verifier_snprintf(void *ctx)
+{
+ u64 args[] = { data };
+ char buf[128];
+ int len;
+
+ len = bpf_snprintf(buf, sizeof(buf), fmt, args, 1);
+ if (len > 0)
+ bpf_printk("snprintf: %s\n", buf);
+ return 0;
+}
+
+static volatile const char fmt2[] SEC(".percpu.fmt") = "data %d\n";
+
+SEC("?kprobe")
+__success
+__xlated("r{{[0-9]+}} = &(void __percpu *)(r{{[0-9]+}})")
+int verifier_percpu_read(void *ctx)
+{
+ char c = fmt2[0];
+
+ return c == 'd';
+}
+
+char _license[] SEC("license") = "GPL";
--
2.53.0
^ permalink raw reply related [flat|nested] 26+ messages in thread* Re: [PATCH bpf-next v4 7/8] selftests/bpf: Add tests to verify global percpu data
2026-04-14 13:24 ` [PATCH bpf-next v4 7/8] selftests/bpf: Add tests to verify " Leon Hwang
@ 2026-04-14 21:45 ` sashiko-bot
2026-04-17 2:06 ` Leon Hwang
0 siblings, 1 reply; 26+ messages in thread
From: sashiko-bot @ 2026-04-14 21:45 UTC (permalink / raw)
To: Leon Hwang; +Cc: bpf
Sashiko AI review found 1 potential issue(s):
- [Medium] The tests use the raw CPU ID to index into the densely packed `percpu_data` array, which will cause an out-of-bounds read and test failures on systems with sparse CPU masks.
--
commit a7c2685b539ebc816cdb05decab1d7a8a4e1328e
Author: Leon Hwang <leon.hwang@linux.dev>
selftests/bpf: Add tests to verify global percpu data
This commit adds new selftests to verify the behavior of global percpu data
by checking map initial values, struct pointers, and looking up elements from
global percpu data maps.
> diff --git a/tools/testing/selftests/bpf/prog_tests/global_data_init.c b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
> --- a/tools/testing/selftests/bpf/prog_tests/global_data_init.c
> +++ b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
[ ... ]
> +static void test_global_percpu_data_init(void)
> +{
[ ... ]
> + err = bpf_map__lookup_elem(map, &key, sizeof(key), percpu_data,
> + elem_sz * num_cpus, 0);
> + if (!ASSERT_OK(err, "bpf_map__lookup_elem"))
> + goto out;
> +
> + for (i = 0; i < num_online; i++) {
> + if (!online[i])
> + continue;
> +
> + data = (void *)percpu_data + elem_sz * i;
Will this logic cause an out-of-bounds read on systems with sparse CPU masks?
When reading a per-CPU map using bpf_map__lookup_elem(), the kernel packs the
per-CPU values densely into the output buffer by iterating over the possible
CPU mask.
The test allocates num_cpus (the total count of possible CPUs) elements for
the output buffer, but then iterates over the raw CPU IDs using i and uses i
as an index into the densely packed array.
On systems with sparse CPU masks (for example, where CPUs 0 and 100 are
possible/online), num_cpus will be 2. When the loop reaches i = 100, it
appears it will attempt to access percpu_data + elem_sz * 100, reading past
the bounds of the allocated memory.
Could we maintain a separate index counter that increments only for possible
CPUs to match the kernel's dense packing behavior?
[ ... ]
> +static void test_global_percpu_data_lskel(void)
> +{
[ ... ]
> + err = bpf_map_lookup_elem(map_fd, &key, percpu_data);
> + if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
> + goto out;
> +
> + for (i = 0; i < num_online; i++) {
> + if (!online[i])
> + continue;
> +
> + data = (void *)percpu_data + elem_sz * i;
Does this loop have the same bounds checking issue as mentioned above?
If i is a raw CPU ID on a system with a sparse CPU mask, this calculation
could exceed the bounds of the percpu_data buffer.
--
Sashiko AI review · https://sashiko.dev/#/patchset/20260414132421.63409-1-leon.hwang@linux.dev?part=7
^ permalink raw reply [flat|nested] 26+ messages in thread* Re: [PATCH bpf-next v4 7/8] selftests/bpf: Add tests to verify global percpu data
2026-04-14 21:45 ` sashiko-bot
@ 2026-04-17 2:06 ` Leon Hwang
0 siblings, 0 replies; 26+ messages in thread
From: Leon Hwang @ 2026-04-17 2:06 UTC (permalink / raw)
To: sashiko; +Cc: bpf
On 15/4/26 05:45, sashiko-bot@kernel.org wrote:
> Sashiko AI review found 1 potential issue(s):
> - [Medium] The tests use the raw CPU ID to index into the densely packed `percpu_data` array, which will cause an out-of-bounds read and test failures on systems with sparse CPU masks.
> --
>
> commit a7c2685b539ebc816cdb05decab1d7a8a4e1328e
> Author: Leon Hwang <leon.hwang@linux.dev>
>
> selftests/bpf: Add tests to verify global percpu data
>
> This commit adds new selftests to verify the behavior of global percpu data
> by checking map initial values, struct pointers, and looking up elements from
> global percpu data maps.
>
>> diff --git a/tools/testing/selftests/bpf/prog_tests/global_data_init.c b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
>> --- a/tools/testing/selftests/bpf/prog_tests/global_data_init.c
>> +++ b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
>
> [ ... ]
>
>> +static void test_global_percpu_data_init(void)
>> +{
>
> [ ... ]
>
>> + err = bpf_map__lookup_elem(map, &key, sizeof(key), percpu_data,
>> + elem_sz * num_cpus, 0);
>> + if (!ASSERT_OK(err, "bpf_map__lookup_elem"))
>> + goto out;
>> +
>> + for (i = 0; i < num_online; i++) {
>> + if (!online[i])
>> + continue;
>> +
>> + data = (void *)percpu_data + elem_sz * i;
>
> Will this logic cause an out-of-bounds read on systems with sparse CPU masks?
>
> When reading a per-CPU map using bpf_map__lookup_elem(), the kernel packs the
> per-CPU values densely into the output buffer by iterating over the possible
> CPU mask.
>
> The test allocates num_cpus (the total count of possible CPUs) elements for
> the output buffer, but then iterates over the raw CPU IDs using i and uses i
> as an index into the densely packed array.
>
> On systems with sparse CPU masks (for example, where CPUs 0 and 100 are
> possible/online), num_cpus will be 2. When the loop reaches i = 100, it
> appears it will attempt to access percpu_data + elem_sz * 100, reading past
> the bounds of the allocated memory.
>
> Could we maintain a separate index counter that increments only for possible
> CPUs to match the kernel's dense packing behavior?
>
> [ ... ]
>
>> +static void test_global_percpu_data_lskel(void)
>> +{
>
> [ ... ]
>
>> + err = bpf_map_lookup_elem(map_fd, &key, percpu_data);
>> + if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
>> + goto out;
>> +
>> + for (i = 0; i < num_online; i++) {
>> + if (!online[i])
>> + continue;
>> +
>> + data = (void *)percpu_data + elem_sz * i;
>
> Does this loop have the same bounds checking issue as mentioned above?
>
> If i is a raw CPU ID on a system with a sparse CPU mask, this calculation
> could exceed the bounds of the percpu_data buffer.
>
Yep, agree with the review.
Instead of lookup values across all possible CPUs, it should lookup
value on specified CPU using BPF_F_CPU, because the test only runs on
online CPUs.
Thanks,
Leon
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH bpf-next v4 8/8] selftests/bpf: Add a test to verify bpf_iter for global percpu data
2026-04-14 13:24 [PATCH bpf-next v4 0/8] bpf: Introduce global percpu data Leon Hwang
` (6 preceding siblings ...)
2026-04-14 13:24 ` [PATCH bpf-next v4 7/8] selftests/bpf: Add tests to verify " Leon Hwang
@ 2026-04-14 13:24 ` Leon Hwang
2026-04-14 22:08 ` sashiko-bot
7 siblings, 1 reply; 26+ messages in thread
From: Leon Hwang @ 2026-04-14 13:24 UTC (permalink / raw)
To: bpf
Cc: ast, andrii, daniel, yonghong.song, song, eddyz87, qmo, dxu,
leon.hwang, kernel-patches-bot
Add a test to verify that it is OK to iter the percpu_array map used for
global percpu data.
Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
.../bpf/prog_tests/global_data_init.c | 53 +++++++++++++++++++
.../bpf/progs/test_global_percpu_data.c | 36 +++++++++++++
2 files changed, 89 insertions(+)
diff --git a/tools/testing/selftests/bpf/prog_tests/global_data_init.c b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
index 0b0384b6515e..9ff09ff1981c 100644
--- a/tools/testing/selftests/bpf/prog_tests/global_data_init.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
@@ -269,6 +269,57 @@ static void test_global_percpu_data_verifier_failure(void)
RUN_TESTS(test_global_percpu_data);
}
+static void test_global_percpu_data_iter(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ struct test_global_percpu_data *skel;
+ union bpf_iter_link_info linfo = {};
+ struct bpf_link *link = NULL;
+ int fd, num_cpus, len, err;
+ char buf[16];
+
+ num_cpus = libbpf_num_possible_cpus();
+ if (!ASSERT_GT(num_cpus, 0, "libbpf_num_possible_cpus"))
+ return;
+
+ skel = test_global_percpu_data__open();
+ if (!ASSERT_OK_PTR(skel, "test_global_percpu_data__open"))
+ return;
+
+ skel->rodata->num_cpus = num_cpus;
+ skel->percpu->struct_data.nums[6] = 0xc0de;
+
+ err = test_global_percpu_data__load(skel);
+ if (err == -EOPNOTSUPP) {
+ test__skip();
+ goto out;
+ }
+ if (!ASSERT_OK(err, "test_global_percpu_data__load"))
+ goto out;
+
+ linfo.map.map_fd = bpf_map__fd(skel->maps.percpu);
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+ link = bpf_program__attach_iter(skel->progs.dump_percpu_data, &opts);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_iter"))
+ goto out;
+
+ fd = bpf_iter_create(bpf_link__fd(link));
+ if (!ASSERT_GE(fd, 0, "bpf_iter_create"))
+ goto out;
+
+ while ((len = read(fd, buf, sizeof(buf))) > 0)
+ do { } while (0);
+ ASSERT_EQ(len, 0, "read iter");
+ ASSERT_TRUE(skel->bss->run_iter, "run_iter");
+ ASSERT_EQ(skel->bss->percpu_data_sum, 0xc0de * num_cpus, "percpu_data_sum");
+
+ close(fd);
+out:
+ bpf_link__destroy(link);
+ test_global_percpu_data__destroy(skel);
+}
+
void test_global_percpu_data(void)
{
if (!feat_supported(NULL, FEAT_PERCPU_DATA)) {
@@ -282,4 +333,6 @@ void test_global_percpu_data(void)
test_global_percpu_data_lskel();
if (test__start_subtest("verifier_failure"))
test_global_percpu_data_verifier_failure();
+ if (test__start_subtest("iter"))
+ test_global_percpu_data_iter();
}
diff --git a/tools/testing/selftests/bpf/progs/test_global_percpu_data.c b/tools/testing/selftests/bpf/progs/test_global_percpu_data.c
index 947721c21f30..0ddb820509eb 100644
--- a/tools/testing/selftests/bpf/progs/test_global_percpu_data.c
+++ b/tools/testing/selftests/bpf/progs/test_global_percpu_data.c
@@ -63,4 +63,40 @@ int verifier_percpu_read(void *ctx)
return c == 'd';
}
+volatile const __u32 num_cpus = 0;
+__u32 percpu_data_sum = 0;
+bool run_iter = false;
+
+SEC("iter/bpf_map_elem")
+__auxiliary
+int dump_percpu_data(struct bpf_iter__bpf_map_elem *ctx)
+{
+ struct {
+ int data;
+ char run;
+ struct {
+ char set;
+ int i;
+ int nums[7];
+ } struct_data;
+ int nums[7];
+ } *pptr = ctx->value;
+ __u32 step;
+ int i;
+
+ if (!pptr)
+ return 0;
+
+ run_iter = true;
+
+ /* percpu array element size is aligned to 8 */
+ step = (sizeof(*pptr) + 7) & ~7;
+
+ for (i = 0; i < num_cpus; i++) {
+ percpu_data_sum += pptr->struct_data.nums[6];
+ pptr = (void *)pptr + step;
+ }
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
--
2.53.0
^ permalink raw reply related [flat|nested] 26+ messages in thread* Re: [PATCH bpf-next v4 8/8] selftests/bpf: Add a test to verify bpf_iter for global percpu data
2026-04-14 13:24 ` [PATCH bpf-next v4 8/8] selftests/bpf: Add a test to verify bpf_iter for " Leon Hwang
@ 2026-04-14 22:08 ` sashiko-bot
2026-04-17 2:17 ` Leon Hwang
0 siblings, 1 reply; 26+ messages in thread
From: sashiko-bot @ 2026-04-14 22:08 UTC (permalink / raw)
To: Leon Hwang; +Cc: bpf
Sashiko AI review found 3 potential issue(s):
- [Medium] The `dump_percpu_data` program is autoloaded by default, breaking existing subtests on older kernels.
- [Medium] Checking `err == -EOPNOTSUPP` after skeleton load incorrectly handles unsupported kernels.
- [Medium] Casting the global datasec to an anonymous C struct relies on fragile compiler layout assumptions.
--
commit 1e00e3e2b318918e67a2c7d2e251132e1b985484
Author: Leon Hwang <leon.hwang@linux.dev>
selftests/bpf: Add a test to verify bpf_iter for global percpu data
Add a test to verify that it is OK to iter the percpu_array map used for
global percpu data.
> diff --git a/tools/testing/selftests/bpf/prog_tests/global_data_init.c b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
> index 0b0384b6515ed..9ff09ff1981c4 100644
> --- a/tools/testing/selftests/bpf/prog_tests/global_data_init.c
> +++ b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
[ ... ]
> @@ -269,6 +269,57 @@ static void test_global_percpu_data_verifier_failure(void)
> +static void test_global_percpu_data_iter(void)
> +{
[ ... ]
> + err = test_global_percpu_data__load(skel);
> + if (err == -EOPNOTSUPP) {
> + test__skip();
> + goto out;
> + }
Will this correctly skip on older kernels? When the kernel verifier
rejects an unsupported program type or feature during bpf_prog_load(),
it typically returns -EINVAL (or -EACCES), not -EOPNOTSUPP.
Libbpf passes this error code back to the caller, so this condition
might evaluate to false, causing the test to fail on the subsequent
ASSERT_OK(err) check instead of gracefully skipping.
[ ... ]
> diff --git a/tools/testing/selftests/bpf/progs/test_global_percpu_data.c b/tools/testing/selftests/bpf/progs/test_global_percpu_data.c
> index 947721c21f30c..0ddb820509ebf 100644
> --- a/tools/testing/selftests/bpf/progs/test_global_percpu_data.c
> +++ b/tools/testing/selftests/bpf/progs/test_global_percpu_data.c
[ ... ]
> +SEC("iter/bpf_map_elem")
> +__auxiliary
> +int dump_percpu_data(struct bpf_iter__bpf_map_elem *ctx)
Will this break existing subtests on older kernels? The bpf program
dump_percpu_data is defined with SEC("iter/bpf_map_elem") without the
? prefix, meaning libbpf will attempt to autoload it whenever
test_global_percpu_data__load() is called.
Since the existing init and lskel subtests also load the full skeleton,
they might inadvertently attempt to load this new iterator program.
Should this use SEC("?iter/bpf_map_elem") to prevent autoloading by
default, and only be explicitly autoloaded in the iter subtest?
> +{
> + struct {
> + int data;
> + char run;
> + struct {
> + char set;
> + int i;
> + int nums[7];
> + } struct_data;
> + int nums[7];
> + } *pptr = ctx->value;
Is it safe to cast the global datasec to an anonymous C struct like
this?
In the BPF source, these are declared as independent global variables.
The layout of these variables in the .percpu ELF section is determined
entirely by the compiler. While the current compiler version might emit
the uninitialized nums array after the initialized variables (matching
the struct layout), this relies on undocumented compiler behavior.
If the compiler alters the layout, or if variables are added or
initialized differently in the future, the field offsets will mismatch.
This could cause the step calculation to diverge from the map's actual
value_size, leading to out-of-bounds reads.
Would it be safer to group these variables into a single explicit struct
in the BPF code to guarantee their memory layout?
[ ... ]
--
Sashiko AI review · https://sashiko.dev/#/patchset/20260414132421.63409-1-leon.hwang@linux.dev?part=8
^ permalink raw reply [flat|nested] 26+ messages in thread* Re: [PATCH bpf-next v4 8/8] selftests/bpf: Add a test to verify bpf_iter for global percpu data
2026-04-14 22:08 ` sashiko-bot
@ 2026-04-17 2:17 ` Leon Hwang
0 siblings, 0 replies; 26+ messages in thread
From: Leon Hwang @ 2026-04-17 2:17 UTC (permalink / raw)
To: sashiko; +Cc: bpf
On 15/4/26 06:08, sashiko-bot@kernel.org wrote:
> Sashiko AI review found 3 potential issue(s):
> - [Medium] The `dump_percpu_data` program is autoloaded by default, breaking existing subtests on older kernels.
> - [Medium] Checking `err == -EOPNOTSUPP` after skeleton load incorrectly handles unsupported kernels.
> - [Medium] Casting the global datasec to an anonymous C struct relies on fragile compiler layout assumptions.
> --
>
> commit 1e00e3e2b318918e67a2c7d2e251132e1b985484
> Author: Leon Hwang <leon.hwang@linux.dev>
>
> selftests/bpf: Add a test to verify bpf_iter for global percpu data
>
> Add a test to verify that it is OK to iter the percpu_array map used for
> global percpu data.
>
>> diff --git a/tools/testing/selftests/bpf/prog_tests/global_data_init.c b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
>> index 0b0384b6515ed..9ff09ff1981c4 100644
>> --- a/tools/testing/selftests/bpf/prog_tests/global_data_init.c
>> +++ b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
>
> [ ... ]
>
>> @@ -269,6 +269,57 @@ static void test_global_percpu_data_verifier_failure(void)
>> +static void test_global_percpu_data_iter(void)
>> +{
>
> [ ... ]
>
>> + err = test_global_percpu_data__load(skel);
>> + if (err == -EOPNOTSUPP) {
>> + test__skip();
>> + goto out;
>> + }
>
> Will this correctly skip on older kernels? When the kernel verifier
> rejects an unsupported program type or feature during bpf_prog_load(),
> it typically returns -EINVAL (or -EACCES), not -EOPNOTSUPP.
>
> Libbpf passes this error code back to the caller, so this condition
> might evaluate to false, causing the test to fail on the subsequent
> ASSERT_OK(err) check instead of gracefully skipping.
>
This err check should be dropped, because this test won't run when the
kernel does not support global percpu data.
See previous patch:
+void test_global_percpu_data(void)
+{
+ if (!feat_supported(NULL, FEAT_PERCPU_DATA)) {
+ test__skip();
+ return;
+ }
> [ ... ]
>
>> diff --git a/tools/testing/selftests/bpf/progs/test_global_percpu_data.c b/tools/testing/selftests/bpf/progs/test_global_percpu_data.c
>> index 947721c21f30c..0ddb820509ebf 100644
>> --- a/tools/testing/selftests/bpf/progs/test_global_percpu_data.c
>> +++ b/tools/testing/selftests/bpf/progs/test_global_percpu_data.c
>
> [ ... ]
>
>> +SEC("iter/bpf_map_elem")
>> +__auxiliary
>> +int dump_percpu_data(struct bpf_iter__bpf_map_elem *ctx)
>
> Will this break existing subtests on older kernels? The bpf program
> dump_percpu_data is defined with SEC("iter/bpf_map_elem") without the
> ? prefix, meaning libbpf will attempt to autoload it whenever
> test_global_percpu_data__load() is called.
>
> Since the existing init and lskel subtests also load the full skeleton,
> they might inadvertently attempt to load this new iterator program.
> Should this use SEC("?iter/bpf_map_elem") to prevent autoloading by
> default, and only be explicitly autoloaded in the iter subtest?
>
Since older kernels do not have global percpu data support, this test
won't run on them. See aforementioned explanation.
>> +{
>> + struct {
>> + int data;
>> + char run;
>> + struct {
>> + char set;
>> + int i;
>> + int nums[7];
>> + } struct_data;
>> + int nums[7];
>> + } *pptr = ctx->value;
>
> Is it safe to cast the global datasec to an anonymous C struct like
> this?
>
> In the BPF source, these are declared as independent global variables.
> The layout of these variables in the .percpu ELF section is determined
> entirely by the compiler. While the current compiler version might emit
> the uninitialized nums array after the initialized variables (matching
> the struct layout), this relies on undocumented compiler behavior.
>
> If the compiler alters the layout, or if variables are added or
> initialized differently in the future, the field offsets will mismatch.
> This could cause the step calculation to diverge from the map's actual
> value_size, leading to out-of-bounds reads.
>
> Would it be safer to group these variables into a single explicit struct
> in the BPF code to guarantee their memory layout?
>
> [ ... ]
>
Correct, I should not use such an anonymous struct, even if it matches
the generated skeleton's embedded struct.
A better approach is to locate the int by offset, which will match its
offset in the generated skeleton's embedded struct.
Thanks,
Leon
^ permalink raw reply [flat|nested] 26+ messages in thread