From: Leon Hwang <leon.hwang@linux.dev>
To: bpf@vger.kernel.org
Cc: ast@kernel.org, andrii@kernel.org, daniel@iogearbox.net,
olsajiri@gmail.com, yonghong.song@linux.dev, song@kernel.org,
eddyz87@gmail.com, dxu@dxuuu.xyz, deso@posteo.net,
leon.hwang@linux.dev, kernel-patches-bot@fb.com
Subject: [PATCH bpf-next v3 2/6] bpf: Introduce BPF_F_CPU flag for percpu_array maps
Date: Fri, 22 Aug 2025 00:08:13 +0800 [thread overview]
Message-ID: <20250821160817.70285-3-leon.hwang@linux.dev> (raw)
In-Reply-To: <20250821160817.70285-1-leon.hwang@linux.dev>
Introduce support for the BPF_F_ALL_CPUS flag in percpu_array maps to
allow updating values for all CPUs with a single value.
Introduce support for the BPF_F_CPU flag in percpu_array maps to allow
updating value for specified CPU.
This enhancement enables:
* Efficient update values across all CPUs with a single value when
BPF_F_ALL_CPUS is set for update_elem and update_batch APIs.
* Targeted update or lookup for a specified CPU when BPF_F_CPU is set.
The BPF_F_CPU flag is passed via:
* map_flags of lookup_elem and update_elem APIs along with embedded cpu
field.
* elem_flags of lookup_batch and update_batch APIs along with embedded
cpu field.
Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
include/linux/bpf.h | 3 +-
include/uapi/linux/bpf.h | 2 ++
kernel/bpf/arraymap.c | 56 ++++++++++++++++++++++++++--------
kernel/bpf/syscall.c | 27 ++++++++++------
tools/include/uapi/linux/bpf.h | 2 ++
5 files changed, 67 insertions(+), 23 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 8f6e87f0f3a89..b2191b1e455a6 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2697,7 +2697,8 @@ int map_set_for_each_callback_args(struct bpf_verifier_env *env,
struct bpf_func_state *callee);
int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
-int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
+int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value,
+ u64 flags);
int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
u64 flags);
int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 233de8677382e..be1fdc5042744 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1372,6 +1372,8 @@ enum {
BPF_NOEXIST = 1, /* create new element if it didn't exist */
BPF_EXIST = 2, /* update existing element */
BPF_F_LOCK = 4, /* spin_lock-ed map_lookup/map_update */
+ BPF_F_CPU = 8, /* cpu flag for percpu maps, upper 32-bit of flags is a cpu number */
+ BPF_F_ALL_CPUS = 16, /* update value across all CPUs for percpu maps */
};
/* flags for BPF_MAP_CREATE command */
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 3d080916faf97..1efa730105e24 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -295,17 +295,24 @@ static void *percpu_array_map_lookup_percpu_elem(struct bpf_map *map, void *key,
return per_cpu_ptr(array->pptrs[index & array->index_mask], cpu);
}
-int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
+int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value, u64 flags)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
u32 index = *(u32 *)key;
void __percpu *pptr;
- int cpu, off = 0;
- u32 size;
+ u32 size, cpu;
+ int off = 0;
if (unlikely(index >= array->map.max_entries))
return -ENOENT;
+ if (unlikely((u32)flags & ~BPF_F_CPU))
+ return -EINVAL;
+
+ cpu = flags >> 32;
+ if (unlikely((flags & BPF_F_CPU) && cpu >= num_possible_cpus()))
+ return -ERANGE;
+
/* per_cpu areas are zero-filled and bpf programs can only
* access 'value_size' of them, so copying rounded areas
* will not leak any kernel data
@@ -313,10 +320,15 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
size = array->elem_size;
rcu_read_lock();
pptr = array->pptrs[index & array->index_mask];
- for_each_possible_cpu(cpu) {
- copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu));
- check_and_init_map_value(map, value + off);
- off += size;
+ if (flags & BPF_F_CPU) {
+ copy_map_value_long(map, value, per_cpu_ptr(pptr, cpu));
+ check_and_init_map_value(map, value);
+ } else {
+ for_each_possible_cpu(cpu) {
+ copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu));
+ check_and_init_map_value(map, value + off);
+ off += size;
+ }
}
rcu_read_unlock();
return 0;
@@ -385,14 +397,22 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
u64 map_flags)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
+ const u64 cpu_flags = BPF_F_CPU | BPF_F_ALL_CPUS;
u32 index = *(u32 *)key;
void __percpu *pptr;
- int cpu, off = 0;
- u32 size;
+ u32 size, cpu;
+ int off = 0;
- if (unlikely(map_flags > BPF_EXIST))
+ if (unlikely((u32)map_flags > BPF_F_ALL_CPUS))
/* unknown flags */
return -EINVAL;
+ if (unlikely((map_flags & cpu_flags) == cpu_flags))
+ return -EINVAL;
+
+ cpu = map_flags >> 32;
+ if (unlikely((map_flags & BPF_F_CPU) && cpu >= num_possible_cpus()))
+ /* invalid cpu */
+ return -ERANGE;
if (unlikely(index >= array->map.max_entries))
/* all elements were pre-allocated, cannot insert a new one */
@@ -411,10 +431,20 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
size = array->elem_size;
rcu_read_lock();
pptr = array->pptrs[index & array->index_mask];
- for_each_possible_cpu(cpu) {
- copy_map_value_long(map, per_cpu_ptr(pptr, cpu), value + off);
+ if (map_flags & BPF_F_CPU) {
+ copy_map_value_long(map, per_cpu_ptr(pptr, cpu), value);
bpf_obj_free_fields(array->map.record, per_cpu_ptr(pptr, cpu));
- off += size;
+ } else {
+ for_each_possible_cpu(cpu) {
+ copy_map_value_long(map, per_cpu_ptr(pptr, cpu), value + off);
+ /* same user-provided value is used if
+ * BPF_F_ALL_CPUS is specified, otherwise value is
+ * an array of per-cpu values.
+ */
+ if (!(map_flags & BPF_F_ALL_CPUS))
+ off += size;
+ bpf_obj_free_fields(array->map.record, per_cpu_ptr(pptr, cpu));
+ }
}
rcu_read_unlock();
return 0;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 19f7f5de5e7dc..6251ac9bc7e42 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -131,9 +131,11 @@ bool bpf_map_write_active(const struct bpf_map *map)
return atomic64_read(&map->writecnt) != 0;
}
-static u32 bpf_map_value_size(const struct bpf_map *map)
+static u32 bpf_map_value_size(const struct bpf_map *map, u64 flags)
{
- if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+ if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY && (flags & (BPF_F_CPU | BPF_F_ALL_CPUS)))
+ return round_up(map->value_size, 8);
+ else if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY ||
map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
@@ -314,7 +316,7 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
err = bpf_percpu_hash_copy(map, key, value);
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
- err = bpf_percpu_array_copy(map, key, value);
+ err = bpf_percpu_array_copy(map, key, value, flags);
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
err = bpf_percpu_cgroup_storage_copy(map, key, value);
} else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
@@ -1656,12 +1658,19 @@ static void *___bpf_copy_key(bpfptr_t ukey, u64 key_size)
static int check_map_flags(struct bpf_map *map, u64 flags, bool check_flag)
{
- if (check_flag && (flags & ~BPF_F_LOCK))
+ if (check_flag && ((u32)flags & ~(BPF_F_LOCK | BPF_F_CPU | BPF_F_ALL_CPUS)))
return -EINVAL;
if ((flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK))
return -EINVAL;
+ if (!(flags & BPF_F_CPU) && flags >> 32)
+ return -EINVAL;
+
+ if ((flags & (BPF_F_CPU | BPF_F_ALL_CPUS)) &&
+ map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY)
+ return -EINVAL;
+
return 0;
}
@@ -1695,7 +1704,7 @@ static int map_lookup_elem(union bpf_attr *attr)
if (IS_ERR(key))
return PTR_ERR(key);
- value_size = bpf_map_value_size(map);
+ value_size = bpf_map_value_size(map, attr->flags);
err = -ENOMEM;
value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
@@ -1762,7 +1771,7 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
goto err_put;
}
- value_size = bpf_map_value_size(map);
+ value_size = bpf_map_value_size(map, attr->flags);
value = kvmemdup_bpfptr(uvalue, value_size);
if (IS_ERR(value)) {
err = PTR_ERR(value);
@@ -1962,7 +1971,7 @@ int generic_map_update_batch(struct bpf_map *map, struct file *map_file,
if (err)
return err;
- value_size = bpf_map_value_size(map);
+ value_size = bpf_map_value_size(map, attr->batch.elem_flags);
max_count = attr->batch.count;
if (!max_count)
@@ -2021,7 +2030,7 @@ int generic_map_lookup_batch(struct bpf_map *map,
if (err)
return err;
- value_size = bpf_map_value_size(map);
+ value_size = bpf_map_value_size(map, attr->batch.elem_flags);
max_count = attr->batch.count;
if (!max_count)
@@ -2143,7 +2152,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
goto err_put;
}
- value_size = bpf_map_value_size(map);
+ value_size = bpf_map_value_size(map, 0);
err = -ENOMEM;
value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 233de8677382e..be1fdc5042744 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1372,6 +1372,8 @@ enum {
BPF_NOEXIST = 1, /* create new element if it didn't exist */
BPF_EXIST = 2, /* update existing element */
BPF_F_LOCK = 4, /* spin_lock-ed map_lookup/map_update */
+ BPF_F_CPU = 8, /* cpu flag for percpu maps, upper 32-bit of flags is a cpu number */
+ BPF_F_ALL_CPUS = 16, /* update value across all CPUs for percpu maps */
};
/* flags for BPF_MAP_CREATE command */
--
2.50.1
next prev parent reply other threads:[~2025-08-21 16:08 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-08-21 16:08 [PATCH bpf-next v3 0/6] Introduce BPF_F_CPU flag for percpu maps Leon Hwang
2025-08-21 16:08 ` [PATCH bpf-next v3 1/6] bpf: Introduce internal check_map_flags helper function Leon Hwang
2025-08-22 22:14 ` Andrii Nakryiko
2025-08-26 15:24 ` Leon Hwang
2025-08-26 22:50 ` Andrii Nakryiko
2025-08-21 16:08 ` Leon Hwang [this message]
2025-08-22 22:14 ` [PATCH bpf-next v3 2/6] bpf: Introduce BPF_F_CPU flag for percpu_array maps Andrii Nakryiko
2025-08-26 14:45 ` Leon Hwang
2025-08-21 16:08 ` [PATCH bpf-next v3 3/6] bpf: Introduce BPF_F_CPU flag for percpu_hash and lru_percpu_hash maps Leon Hwang
2025-08-22 22:14 ` Andrii Nakryiko
2025-08-26 15:14 ` Leon Hwang
2025-08-21 16:08 ` [PATCH bpf-next v3 4/6] bpf: Introduce BPF_F_CPU flag for percpu_cgroup_storage maps Leon Hwang
2025-08-21 16:08 ` [PATCH bpf-next v3 5/6] libbpf: Support BPF_F_CPU for percpu maps Leon Hwang
2025-08-22 22:20 ` Andrii Nakryiko
2025-08-26 15:35 ` Leon Hwang
2025-08-26 22:50 ` Andrii Nakryiko
2025-08-21 16:08 ` [PATCH bpf-next v3 6/6] selftests/bpf: Add cases to test BPF_F_CPU flag Leon Hwang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250821160817.70285-3-leon.hwang@linux.dev \
--to=leon.hwang@linux.dev \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=deso@posteo.net \
--cc=dxu@dxuuu.xyz \
--cc=eddyz87@gmail.com \
--cc=kernel-patches-bot@fb.com \
--cc=olsajiri@gmail.com \
--cc=song@kernel.org \
--cc=yonghong.song@linux.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.