From: Leon Hwang <leon.hwang@linux.dev>
To: bpf@vger.kernel.org
Cc: ast@kernel.org, andrii@kernel.org, daniel@iogearbox.net,
jolsa@kernel.org, yonghong.song@linux.dev, song@kernel.org,
eddyz87@gmail.com, dxu@dxuuu.xyz, deso@posteo.net,
leon.hwang@linux.dev, kernel-patches-bot@fb.com
Subject: [PATCH bpf-next v9 4/7] bpf: Add BPF_F_CPU and BPF_F_ALL_CPUS flags support for percpu_hash and lru_percpu_hash maps
Date: Tue, 30 Sep 2025 23:39:39 +0800 [thread overview]
Message-ID: <20250930153942.41781-5-leon.hwang@linux.dev> (raw)
In-Reply-To: <20250930153942.41781-1-leon.hwang@linux.dev>
Introduce BPF_F_ALL_CPUS flag support for percpu_hash and lru_percpu_hash
maps to allow updating values for all CPUs with a single value for both
update_elem and update_batch APIs.
Introduce BPF_F_CPU flag support for percpu_hash and lru_percpu_hash
maps to allow:
* update value for specified CPU for both update_elem and update_batch
APIs.
* lookup value for specified CPU for both lookup_elem and lookup_batch
APIs.
The BPF_F_CPU flag is passed via:
* map_flags along with embedded cpu info.
* elem_flags along with embedded cpu info.
Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
include/linux/bpf.h | 4 ++-
kernel/bpf/hashtab.c | 82 +++++++++++++++++++++++++++++---------------
kernel/bpf/syscall.c | 2 +-
3 files changed, 59 insertions(+), 29 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 31498b445b447..b3d9a584f34e2 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2717,7 +2717,7 @@ int map_set_for_each_callback_args(struct bpf_verifier_env *env,
struct bpf_func_state *caller,
struct bpf_func_state *callee);
-int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
+int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value, u64 flags);
int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value, u64 flags);
int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
u64 flags);
@@ -3772,6 +3772,8 @@ static inline bool bpf_map_supports_cpu_flags(enum bpf_map_type map_type)
{
switch (map_type) {
case BPF_MAP_TYPE_PERCPU_ARRAY:
+ case BPF_MAP_TYPE_PERCPU_HASH:
+ case BPF_MAP_TYPE_LRU_PERCPU_HASH:
return true;
default:
return false;
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index c2fcd0cd51e51..6fb28ba2406c4 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -945,24 +945,32 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
}
static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
- void *value, bool onallcpus)
+ void *value, bool onallcpus, u64 map_flags)
{
if (!onallcpus) {
/* copy true value_size bytes */
copy_map_value(&htab->map, this_cpu_ptr(pptr), value);
} else {
- u32 size = round_up(htab->map.value_size, 8);
- int off = 0, cpu;
+ u32 size = (map_flags & (BPF_F_CPU | BPF_F_ALL_CPUS)) ?
+ htab->map.value_size : round_up(htab->map.value_size, 8);
+ void *ptr;
+ int cpu;
+
+ if (map_flags & BPF_F_CPU) {
+ cpu = map_flags >> 32;
+ memcpy(per_cpu_ptr(pptr, cpu), value, size);
+ return;
+ }
for_each_possible_cpu(cpu) {
- copy_map_value_long(&htab->map, per_cpu_ptr(pptr, cpu), value + off);
- off += size;
+ ptr = (map_flags & BPF_F_ALL_CPUS) ? value : value + size * cpu;
+ memcpy(per_cpu_ptr(pptr, cpu), ptr, size);
}
}
}
static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr,
- void *value, bool onallcpus)
+ void *value, bool onallcpus, u64 map_flags)
{
/* When not setting the initial value on all cpus, zero-fill element
* values for other cpus. Otherwise, bpf program has no way to ensure
@@ -980,7 +988,7 @@ static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr,
zero_map_value(&htab->map, per_cpu_ptr(pptr, cpu));
}
} else {
- pcpu_copy_value(htab, pptr, value, onallcpus);
+ pcpu_copy_value(htab, pptr, value, onallcpus, map_flags);
}
}
@@ -992,7 +1000,7 @@ static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab)
static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
void *value, u32 key_size, u32 hash,
bool percpu, bool onallcpus,
- struct htab_elem *old_elem)
+ struct htab_elem *old_elem, u64 map_flags)
{
u32 size = htab->map.value_size;
bool prealloc = htab_is_prealloc(htab);
@@ -1050,7 +1058,7 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
pptr = *(void __percpu **)ptr;
}
- pcpu_init_value(htab, pptr, value, onallcpus);
+ pcpu_init_value(htab, pptr, value, onallcpus, map_flags);
if (!prealloc)
htab_elem_set_ptr(l_new, key_size, pptr);
@@ -1155,7 +1163,7 @@ static long htab_map_update_elem(struct bpf_map *map, void *key, void *value,
}
l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
- l_old);
+ l_old, map_flags);
if (IS_ERR(l_new)) {
/* all pre-allocated elements are in use or memory exhausted */
ret = PTR_ERR(l_new);
@@ -1271,9 +1279,11 @@ static long htab_map_update_elem_in_place(struct bpf_map *map, void *key,
u32 key_size, hash;
int ret;
- if (unlikely(map_flags > BPF_EXIST))
+ if (unlikely(!onallcpus && map_flags > BPF_EXIST))
/* unknown flags */
return -EINVAL;
+ if (unlikely(onallcpus && ((map_flags & BPF_F_LOCK) || (u32)map_flags > BPF_F_ALL_CPUS)))
+ return -EINVAL;
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
!rcu_read_lock_bh_held());
@@ -1299,7 +1309,7 @@ static long htab_map_update_elem_in_place(struct bpf_map *map, void *key,
/* Update value in-place */
if (percpu) {
pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
- value, onallcpus);
+ value, onallcpus, map_flags);
} else {
void **inner_map_pptr = htab_elem_value(l_old, key_size);
@@ -1308,7 +1318,7 @@ static long htab_map_update_elem_in_place(struct bpf_map *map, void *key,
}
} else {
l_new = alloc_htab_elem(htab, key, value, key_size,
- hash, percpu, onallcpus, NULL);
+ hash, percpu, onallcpus, NULL, map_flags);
if (IS_ERR(l_new)) {
ret = PTR_ERR(l_new);
goto err;
@@ -1334,9 +1344,11 @@ static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
u32 key_size, hash;
int ret;
- if (unlikely(map_flags > BPF_EXIST))
+ if (unlikely(!onallcpus && map_flags > BPF_EXIST))
/* unknown flags */
return -EINVAL;
+ if (unlikely(onallcpus && ((map_flags & BPF_F_LOCK) || (u32)map_flags > BPF_F_ALL_CPUS)))
+ return -EINVAL;
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
!rcu_read_lock_bh_held());
@@ -1374,10 +1386,10 @@ static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
/* per-cpu hash map can update value in-place */
pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
- value, onallcpus);
+ value, onallcpus, map_flags);
} else {
pcpu_init_value(htab, htab_elem_get_ptr(l_new, key_size),
- value, onallcpus);
+ value, onallcpus, map_flags);
hlist_nulls_add_head_rcu(&l_new->hash_node, head);
l_new = NULL;
}
@@ -1689,9 +1701,9 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
void __user *ukeys = u64_to_user_ptr(attr->batch.keys);
void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch);
u32 batch, max_count, size, bucket_size, map_id;
+ u64 elem_map_flags, map_flags, allowed_flags;
u32 bucket_cnt, total, key_size, value_size;
struct htab_elem *node_to_free = NULL;
- u64 elem_map_flags, map_flags;
struct hlist_nulls_head *head;
struct hlist_nulls_node *n;
unsigned long flags = 0;
@@ -1701,9 +1713,12 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
int ret = 0;
elem_map_flags = attr->batch.elem_flags;
- if ((elem_map_flags & ~BPF_F_LOCK) ||
- ((elem_map_flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK)))
- return -EINVAL;
+ allowed_flags = BPF_F_LOCK;
+ if (!do_delete && is_percpu)
+ allowed_flags |= BPF_F_CPU;
+ ret = bpf_map_check_op_flags(map, elem_map_flags, allowed_flags);
+ if (ret)
+ return ret;
map_flags = attr->batch.flags;
if (map_flags)
@@ -1726,7 +1741,7 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
key_size = htab->map.key_size;
value_size = htab->map.value_size;
size = round_up(value_size, 8);
- if (is_percpu)
+ if (is_percpu && !(elem_map_flags & BPF_F_CPU))
value_size = size * num_possible_cpus();
total = 0;
/* while experimenting with hash tables with sizes ranging from 10 to
@@ -1809,10 +1824,17 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
void __percpu *pptr;
pptr = htab_elem_get_ptr(l, map->key_size);
- for_each_possible_cpu(cpu) {
- copy_map_value_long(&htab->map, dst_val + off, per_cpu_ptr(pptr, cpu));
- check_and_init_map_value(&htab->map, dst_val + off);
- off += size;
+ if (elem_map_flags & BPF_F_CPU) {
+ cpu = elem_map_flags >> 32;
+ copy_map_value(&htab->map, dst_val, per_cpu_ptr(pptr, cpu));
+ check_and_init_map_value(&htab->map, dst_val);
+ } else {
+ for_each_possible_cpu(cpu) {
+ copy_map_value_long(&htab->map, dst_val + off,
+ per_cpu_ptr(pptr, cpu));
+ check_and_init_map_value(&htab->map, dst_val + off);
+ off += size;
+ }
}
} else {
value = htab_elem_value(l, key_size);
@@ -2368,7 +2390,7 @@ static void *htab_lru_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *k
return NULL;
}
-int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)
+int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value, u64 map_flags)
{
struct htab_elem *l;
void __percpu *pptr;
@@ -2385,16 +2407,22 @@ int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)
l = __htab_map_lookup_elem(map, key);
if (!l)
goto out;
+ ret = 0;
/* We do not mark LRU map element here in order to not mess up
* eviction heuristics when user space does a map walk.
*/
pptr = htab_elem_get_ptr(l, map->key_size);
+ if (map_flags & BPF_F_CPU) {
+ cpu = map_flags >> 32;
+ copy_map_value(map, value, per_cpu_ptr(pptr, cpu));
+ check_and_init_map_value(map, value);
+ goto out;
+ }
for_each_possible_cpu(cpu) {
copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu));
check_and_init_map_value(map, value + off);
off += size;
}
- ret = 0;
out:
rcu_read_unlock();
return ret;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index f8de1433fdffb..ce525a474656a 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -316,7 +316,7 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
bpf_disable_instrumentation();
if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
- err = bpf_percpu_hash_copy(map, key, value);
+ err = bpf_percpu_hash_copy(map, key, value, flags);
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
err = bpf_percpu_array_copy(map, key, value, flags);
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
--
2.51.0
next prev parent reply other threads:[~2025-09-30 15:40 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-09-30 15:39 [PATCH bpf-next v9 0/7] bpf: Introduce BPF_F_CPU and BPF_F_ALL_CPUS flags for percpu maps Leon Hwang
2025-09-30 15:39 ` [PATCH bpf-next v9 1/7] bpf: Introduce internal bpf_map_check_op_flags helper function Leon Hwang
2025-09-30 15:39 ` [PATCH bpf-next v9 2/7] bpf: Introduce BPF_F_CPU and BPF_F_ALL_CPUS flags Leon Hwang
2025-09-30 15:39 ` [PATCH bpf-next v9 3/7] bpf: Add BPF_F_CPU and BPF_F_ALL_CPUS flags support for percpu_array maps Leon Hwang
2025-09-30 15:39 ` Leon Hwang [this message]
2025-10-06 22:29 ` [PATCH bpf-next v9 4/7] bpf: Add BPF_F_CPU and BPF_F_ALL_CPUS flags support for percpu_hash and lru_percpu_hash maps Andrii Nakryiko
2025-10-08 4:48 ` Leon Hwang
2025-10-13 23:17 ` Andrii Nakryiko
2025-09-30 15:39 ` [PATCH bpf-next v9 5/7] bpf: Add BPF_F_CPU and BPF_F_ALL_CPUS flags support for percpu_cgroup_storage maps Leon Hwang
2025-10-06 22:33 ` Andrii Nakryiko
2025-10-08 5:31 ` Leon Hwang
2025-09-30 15:39 ` [PATCH bpf-next v9 6/7] libbpf: Add BPF_F_CPU and BPF_F_ALL_CPUS flags support for percpu maps Leon Hwang
2025-09-30 15:39 ` [PATCH bpf-next v9 7/7] selftests/bpf: Add cases to test BPF_F_CPU and BPF_F_ALL_CPUS flags Leon Hwang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250930153942.41781-5-leon.hwang@linux.dev \
--to=leon.hwang@linux.dev \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=deso@posteo.net \
--cc=dxu@dxuuu.xyz \
--cc=eddyz87@gmail.com \
--cc=jolsa@kernel.org \
--cc=kernel-patches-bot@fb.com \
--cc=song@kernel.org \
--cc=yonghong.song@linux.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.