From: Leon Hwang <leon.hwang@linux.dev>
To: bpf@vger.kernel.org
Cc: ast@kernel.org, andrii@kernel.org, daniel@iogearbox.net,
jolsa@kernel.org, yonghong.song@linux.dev, song@kernel.org,
eddyz87@gmail.com, dxu@dxuuu.xyz, deso@posteo.net,
leon.hwang@linux.dev, kernel-patches-bot@fb.com
Subject: [PATCH bpf-next v4 2/7] bpf: Introduce BPF_F_CPU and BPF_F_ALL_CPUS flags
Date: Thu, 28 Aug 2025 00:45:04 +0800 [thread overview]
Message-ID: <20250827164509.7401-3-leon.hwang@linux.dev> (raw)
In-Reply-To: <20250827164509.7401-1-leon.hwang@linux.dev>
Introduce BPF_F_CPU and BPF_F_ALL_CPUS flags and the following internal
helper functions for percpu maps:
* bpf_percpu_copy_to_user: For lookup_elem and lookup_batch user APIs,
copy data to user-provided value pointer.
* bpf_percpu_copy_from_user: For update_elem and update_batch user APIs,
copy data from user-provided value pointer.
* bpf_map_check_cpu_flags: Check BPF_F_CPU, BPF_F_ALL_CPUS and cpu info in
flags.
And, get the correct value size for these user APIs.
Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
include/linux/bpf.h | 89 ++++++++++++++++++++++++++++++++--
include/uapi/linux/bpf.h | 2 +
kernel/bpf/syscall.c | 24 ++++-----
tools/include/uapi/linux/bpf.h | 2 +
4 files changed, 103 insertions(+), 14 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 512717d442c09..a83364949b64c 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -547,6 +547,56 @@ static inline void copy_map_value_long(struct bpf_map *map, void *dst, void *src
bpf_obj_memcpy(map->record, dst, src, map->value_size, true);
}
+#ifdef CONFIG_BPF_SYSCALL
+static inline void bpf_percpu_copy_to_user(struct bpf_map *map, void __percpu *pptr, void *value,
+ u32 size, u64 flags)
+{
+ int current_cpu = raw_smp_processor_id();
+ int cpu, off = 0;
+
+ if (flags & BPF_F_CPU) {
+ cpu = flags >> 32;
+ copy_map_value_long(map, value, cpu != current_cpu ? per_cpu_ptr(pptr, cpu) :
+ this_cpu_ptr(pptr));
+ check_and_init_map_value(map, value);
+ } else {
+ for_each_possible_cpu(cpu) {
+ copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu));
+ check_and_init_map_value(map, value + off);
+ off += size;
+ }
+ }
+}
+
+void bpf_obj_free_fields(const struct btf_record *rec, void *obj);
+
+static inline void bpf_percpu_copy_from_user(struct bpf_map *map, void __percpu *pptr, void *value,
+ u32 size, u64 flags)
+{
+ int current_cpu = raw_smp_processor_id();
+ int cpu, off = 0;
+ void *ptr;
+
+ if (flags & BPF_F_CPU) {
+ cpu = flags >> 32;
+ ptr = cpu == current_cpu ? this_cpu_ptr(pptr) : per_cpu_ptr(pptr, cpu);
+ copy_map_value_long(map, ptr, value);
+ bpf_obj_free_fields(map->record, ptr);
+ } else {
+ for_each_possible_cpu(cpu) {
+ copy_map_value_long(map, per_cpu_ptr(pptr, cpu), value + off);
+ /* same user-provided value is used if
+ * BPF_F_ALL_CPUS is specified, otherwise value is
+ * an array of per-cpu values.
+ */
+ if (!(flags & BPF_F_ALL_CPUS))
+ off += size;
+ bpf_obj_free_fields(map->record, per_cpu_ptr(pptr, cpu));
+ }
+ }
+}
+#endif
+
static inline void bpf_obj_swap_uptrs(const struct btf_record *rec, void *dst, void *src)
{
unsigned long *src_uptr, *dst_uptr;
@@ -2417,7 +2467,6 @@ struct btf_record *btf_record_dup(const struct btf_record *rec);
bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b);
void bpf_obj_free_timer(const struct btf_record *rec, void *obj);
void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj);
-void bpf_obj_free_fields(const struct btf_record *rec, void *obj);
void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu);
struct bpf_map *bpf_map_get(u32 ufd);
@@ -3709,14 +3758,25 @@ int bpf_prog_get_file_line(struct bpf_prog *prog, unsigned long ip, const char *
const char **linep, int *nump);
struct bpf_prog *bpf_prog_find_from_stack(void);
+static inline bool bpf_map_supports_cpu_flags(enum bpf_map_type map_type)
+{
+ return false;
+}
+
static inline int bpf_map_check_op_flags(struct bpf_map *map, u64 flags, u64 extra_flags_mask)
{
- if (extra_flags_mask && (flags & extra_flags_mask))
+ if (extra_flags_mask && ((u32)flags & extra_flags_mask))
return -EINVAL;
if ((flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK))
return -EINVAL;
+ if (!(flags & BPF_F_CPU) && flags >> 32)
+ return -EINVAL;
+
+ if ((flags & (BPF_F_CPU | BPF_F_ALL_CPUS)) && !bpf_map_supports_cpu_flags(map->map_type))
+ return -EINVAL;
+
return 0;
}
@@ -3725,7 +3785,7 @@ static inline int bpf_map_check_update_flags(struct bpf_map *map, u64 flags)
return bpf_map_check_op_flags(map, flags, 0);
}
-#define BPF_MAP_LOOKUP_ELEM_EXTRA_FLAGS_MASK (~BPF_F_LOCK)
+#define BPF_MAP_LOOKUP_ELEM_EXTRA_FLAGS_MASK (~(BPF_F_LOCK | BPF_F_CPU | BPF_F_ALL_CPUS))
static inline int bpf_map_check_lookup_flags(struct bpf_map *map, u64 flags)
{
@@ -3737,4 +3797,27 @@ static inline int bpf_map_check_batch_flags(struct bpf_map *map, u64 flags)
return bpf_map_check_op_flags(map, flags, BPF_MAP_LOOKUP_ELEM_EXTRA_FLAGS_MASK);
}
+static inline int bpf_map_check_cpu_flags(u64 flags, bool check_all_cpus_flag)
+{
+ const u64 cpu_flags = BPF_F_CPU | BPF_F_ALL_CPUS;
+ u32 cpu;
+
+ if (check_all_cpus_flag) {
+ if (unlikely((u32)flags > BPF_F_ALL_CPUS))
+ /* unknown flags */
+ return -EINVAL;
+ if (unlikely((flags & cpu_flags) == cpu_flags))
+ return -EINVAL;
+ } else {
+ if (unlikely((u32)flags & ~BPF_F_CPU))
+ return -EINVAL;
+ }
+
+ cpu = flags >> 32;
+ if (unlikely((flags & BPF_F_CPU) && cpu >= num_possible_cpus()))
+ return -ERANGE;
+
+ return 0;
+}
+
#endif /* _LINUX_BPF_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 233de8677382e..be1fdc5042744 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1372,6 +1372,8 @@ enum {
BPF_NOEXIST = 1, /* create new element if it didn't exist */
BPF_EXIST = 2, /* update existing element */
BPF_F_LOCK = 4, /* spin_lock-ed map_lookup/map_update */
+ BPF_F_CPU = 8, /* cpu flag for percpu maps, upper 32-bit of flags is a cpu number */
+ BPF_F_ALL_CPUS = 16, /* update value across all CPUs for percpu maps */
};
/* flags for BPF_MAP_CREATE command */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 4e04b35944a2b..dbd21484d7a4d 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -131,12 +131,14 @@ bool bpf_map_write_active(const struct bpf_map *map)
return atomic64_read(&map->writecnt) != 0;
}
-static u32 bpf_map_value_size(const struct bpf_map *map)
-{
- if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
- map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
- map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY ||
- map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
+static u32 bpf_map_value_size(const struct bpf_map *map, u64 flags)
+{
+ if (flags & (BPF_F_CPU | BPF_F_ALL_CPUS))
+ return round_up(map->value_size, 8);
+ else if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+ map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
+ map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY ||
+ map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
return round_up(map->value_size, 8) * num_possible_cpus();
else if (IS_FD_MAP(map))
return sizeof(u32);
@@ -1684,7 +1686,7 @@ static int map_lookup_elem(union bpf_attr *attr)
if (IS_ERR(key))
return PTR_ERR(key);
- value_size = bpf_map_value_size(map);
+ value_size = bpf_map_value_size(map, attr->flags);
err = -ENOMEM;
value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
@@ -1751,7 +1753,7 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
goto err_put;
}
- value_size = bpf_map_value_size(map);
+ value_size = bpf_map_value_size(map, attr->flags);
value = kvmemdup_bpfptr(uvalue, value_size);
if (IS_ERR(value)) {
err = PTR_ERR(value);
@@ -1951,7 +1953,7 @@ int generic_map_update_batch(struct bpf_map *map, struct file *map_file,
if (err)
return err;
- value_size = bpf_map_value_size(map);
+ value_size = bpf_map_value_size(map, attr->batch.elem_flags);
max_count = attr->batch.count;
if (!max_count)
@@ -2010,7 +2012,7 @@ int generic_map_lookup_batch(struct bpf_map *map,
if (err)
return err;
- value_size = bpf_map_value_size(map);
+ value_size = bpf_map_value_size(map, attr->batch.elem_flags);
max_count = attr->batch.count;
if (!max_count)
@@ -2132,7 +2134,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
goto err_put;
}
- value_size = bpf_map_value_size(map);
+ value_size = bpf_map_value_size(map, 0);
err = -ENOMEM;
value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 233de8677382e..be1fdc5042744 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1372,6 +1372,8 @@ enum {
BPF_NOEXIST = 1, /* create new element if it didn't exist */
BPF_EXIST = 2, /* update existing element */
BPF_F_LOCK = 4, /* spin_lock-ed map_lookup/map_update */
+ BPF_F_CPU = 8, /* cpu flag for percpu maps, upper 32-bit of flags is a cpu number */
+ BPF_F_ALL_CPUS = 16, /* update value across all CPUs for percpu maps */
};
/* flags for BPF_MAP_CREATE command */
--
2.50.1
next prev parent reply other threads:[~2025-08-27 16:45 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-08-27 16:45 [PATCH bpf-next v4 0/7] bpf: Introduce BPF_F_CPU and BPF_F_ALL_CPUS flags for percpu maps Leon Hwang
2025-08-27 16:45 ` [PATCH bpf-next v4 1/7] bpf: Introduce internal bpf_map_check_op_flags helper function Leon Hwang
2025-08-27 23:17 ` Andrii Nakryiko
2025-09-03 14:39 ` Leon Hwang
2025-08-27 16:45 ` Leon Hwang [this message]
2025-08-27 23:18 ` [PATCH bpf-next v4 2/7] bpf: Introduce BPF_F_CPU and BPF_F_ALL_CPUS flags Andrii Nakryiko
2025-09-03 14:26 ` Leon Hwang
2025-09-03 23:53 ` Andrii Nakryiko
2025-09-04 2:36 ` Leon Hwang
2025-08-27 16:45 ` [PATCH bpf-next v4 3/7] bpf: Introduce BPF_F_CPU and BPF_F_ALL_CPUS flags for percpu_array maps Leon Hwang
2025-08-27 16:45 ` [PATCH bpf-next v4 4/7] bpf: Introduce BPF_F_CPU and BPF_F_ALL_CPUS flags for percpu_hash and lru_percpu_hash maps Leon Hwang
2025-08-27 23:18 ` Andrii Nakryiko
2025-09-03 14:30 ` Leon Hwang
2025-08-27 16:45 ` [PATCH bpf-next v4 5/7] bpf: Introduce BPF_F_CPU and BPF_F_ALL_CPUS flags for percpu_cgroup_storage maps Leon Hwang
2025-08-27 16:45 ` [PATCH bpf-next v4 6/7] libbpf: Support BPF_F_CPU and BPF_F_ALL_CPUS flags for percpu maps Leon Hwang
2025-08-27 23:18 ` Andrii Nakryiko
2025-09-03 14:33 ` Leon Hwang
2025-08-27 16:45 ` [PATCH bpf-next v4 7/7] selftests/bpf: Add cases to test BPF_F_CPU and BPF_F_ALL_CPUS flags Leon Hwang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250827164509.7401-3-leon.hwang@linux.dev \
--to=leon.hwang@linux.dev \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=deso@posteo.net \
--cc=dxu@dxuuu.xyz \
--cc=eddyz87@gmail.com \
--cc=jolsa@kernel.org \
--cc=kernel-patches-bot@fb.com \
--cc=song@kernel.org \
--cc=yonghong.song@linux.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.