All of lore.kernel.org
 help / color / mirror / Atom feed
From: Leon Hwang <leon.hwang@linux.dev>
To: bpf@vger.kernel.org
Cc: ast@kernel.org, andrii@kernel.org, daniel@iogearbox.net,
	Leon Hwang <leon.hwang@linux.dev>
Subject: [RFC PATCH bpf-next 1/3] bpf: Introduce BPF_F_CPU flag for percpu_array map
Date: Wed, 25 Jun 2025 00:53:52 +0800	[thread overview]
Message-ID: <20250624165354.27184-2-leon.hwang@linux.dev> (raw)
In-Reply-To: <20250624165354.27184-1-leon.hwang@linux.dev>

This patch introduces support for the BPF_F_CPU flag in percpu_array maps
to allow updating or looking up values for specific CPUs or for all CPUs
with a single value.

This enhancement enables:

* Efficient update of all CPUs using a single value when cpu == 0xFFFFFFFF.
* Targeted update or lookup for a specific CPU otherwise.

The flag is passed via:

* map_flags in bpf_percpu_array_update() along with the cpu field.
* elem_flags in generic_map_update_batch() along with the cpu field.

Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
 include/linux/bpf.h            |  5 +--
 include/uapi/linux/bpf.h       |  6 ++++
 kernel/bpf/arraymap.c          | 46 ++++++++++++++++++++++++----
 kernel/bpf/syscall.c           | 56 ++++++++++++++++++++++------------
 tools/include/uapi/linux/bpf.h |  6 ++++
 5 files changed, 92 insertions(+), 27 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 5dd556e89cce..4f4cac6c6b84 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2628,11 +2628,12 @@ int map_set_for_each_callback_args(struct bpf_verifier_env *env,
 				   struct bpf_func_state *callee);
 
 int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
-int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
+int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value,
+			  u64 flags, u32 cpu);
 int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
 			   u64 flags);
 int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
-			    u64 flags);
+			    u64 flags, u32 cpu);
 
 int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value);
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 39e7818cca80..a602c45149eb 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1359,8 +1359,12 @@ enum {
 	BPF_NOEXIST	= 1, /* create new element if it didn't exist */
 	BPF_EXIST	= 2, /* update existing element */
 	BPF_F_LOCK	= 4, /* spin_lock-ed map_lookup/map_update */
+	BPF_F_CPU	= 8, /* map_update for percpu_array */
 };
 
+/* indicate updating value on all CPUs for percpu maps. */
+#define BPF_ALL_CPU	0xFFFFFFFF
+
 /* flags for BPF_MAP_CREATE command */
 enum {
 	BPF_F_NO_PREALLOC	= (1U << 0),
@@ -1514,6 +1518,7 @@ union bpf_attr {
 			__aligned_u64 next_key;
 		};
 		__u64		flags;
+		__u32		cpu;
 	};
 
 	struct { /* struct used by BPF_MAP_*_BATCH commands */
@@ -1531,6 +1536,7 @@ union bpf_attr {
 		__u32		map_fd;
 		__u64		elem_flags;
 		__u64		flags;
+		__u32		cpu;
 	} batch;
 
 	struct { /* anonymous struct used by BPF_PROG_LOAD command */
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index eb28c0f219ee..290462a2b1b9 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -295,22 +295,40 @@ static void *percpu_array_map_lookup_percpu_elem(struct bpf_map *map, void *key,
 	return per_cpu_ptr(array->pptrs[index & array->index_mask], cpu);
 }
 
-int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
+int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value,
+			  u64 flags, u32 cpu)
 {
 	struct bpf_array *array = container_of(map, struct bpf_array, map);
 	u32 index = *(u32 *)key;
 	void __percpu *pptr;
-	int cpu, off = 0;
+	int off = 0;
 	u32 size;
 
 	if (unlikely(index >= array->map.max_entries))
 		return -ENOENT;
 
+	if (unlikely(flags > BPF_F_CPU))
+		/* unknown flags */
+		return -EINVAL;
+
 	/* per_cpu areas are zero-filled and bpf programs can only
 	 * access 'value_size' of them, so copying rounded areas
 	 * will not leak any kernel data
 	 */
 	size = array->elem_size;
+
+	if (flags & BPF_F_CPU) {
+		if (cpu >= num_possible_cpus())
+			return -E2BIG;
+
+		rcu_read_lock();
+		pptr = array->pptrs[index & array->index_mask];
+		copy_map_value_long(map, value, per_cpu_ptr(pptr, cpu));
+		check_and_init_map_value(map, value);
+		rcu_read_unlock();
+		return 0;
+	}
+
 	rcu_read_lock();
 	pptr = array->pptrs[index & array->index_mask];
 	for_each_possible_cpu(cpu) {
@@ -382,15 +400,16 @@ static long array_map_update_elem(struct bpf_map *map, void *key, void *value,
 }
 
 int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
-			    u64 map_flags)
+			    u64 map_flags, u32 cpu)
 {
 	struct bpf_array *array = container_of(map, struct bpf_array, map);
 	u32 index = *(u32 *)key;
 	void __percpu *pptr;
-	int cpu, off = 0;
+	bool reuse_value;
+	int off = 0;
 	u32 size;
 
-	if (unlikely(map_flags > BPF_EXIST))
+	if (unlikely(map_flags > BPF_F_CPU))
 		/* unknown flags */
 		return -EINVAL;
 
@@ -409,10 +428,25 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
 	 * so no kernel data leaks possible
 	 */
 	size = array->elem_size;
+
+	if ((map_flags & BPF_F_CPU) && cpu != BPF_ALL_CPU) {
+		if (cpu >= num_possible_cpus())
+			return -E2BIG;
+
+		rcu_read_lock();
+		pptr = array->pptrs[index & array->index_mask];
+		copy_map_value_long(map, per_cpu_ptr(pptr, cpu), value);
+		bpf_obj_free_fields(array->map.record, per_cpu_ptr(pptr, cpu));
+		rcu_read_unlock();
+		return 0;
+	}
+
+	reuse_value = (map_flags & BPF_F_CPU) && cpu == BPF_ALL_CPU;
 	rcu_read_lock();
 	pptr = array->pptrs[index & array->index_mask];
 	for_each_possible_cpu(cpu) {
-		copy_map_value_long(map, per_cpu_ptr(pptr, cpu), value + off);
+		copy_map_value_long(map, per_cpu_ptr(pptr, cpu),
+				    reuse_value ? value : value + off);
 		bpf_obj_free_fields(array->map.record, per_cpu_ptr(pptr, cpu));
 		off += size;
 	}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 56500381c28a..cdff7830baee 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -241,7 +241,7 @@ static int bpf_obj_pin_uptrs(struct btf_record *rec, void *obj)
 }
 
 static int bpf_map_update_value(struct bpf_map *map, struct file *map_file,
-				void *key, void *value, __u64 flags)
+				void *key, void *value, __u64 flags, __u32 cpu)
 {
 	int err;
 
@@ -265,7 +265,7 @@ static int bpf_map_update_value(struct bpf_map *map, struct file *map_file,
 	    map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
 		err = bpf_percpu_hash_update(map, key, value, flags);
 	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
-		err = bpf_percpu_array_update(map, key, value, flags);
+		err = bpf_percpu_array_update(map, key, value, flags, cpu);
 	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
 		err = bpf_percpu_cgroup_storage_update(map, key, value,
 						       flags);
@@ -299,7 +299,7 @@ static int bpf_map_update_value(struct bpf_map *map, struct file *map_file,
 }
 
 static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
-			      __u64 flags)
+			      __u64 flags, __u32 cpu)
 {
 	void *ptr;
 	int err;
@@ -312,7 +312,7 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
 	    map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
 		err = bpf_percpu_hash_copy(map, key, value);
 	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
-		err = bpf_percpu_array_copy(map, key, value);
+		err = bpf_percpu_array_copy(map, key, value, flags, cpu);
 	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
 		err = bpf_percpu_cgroup_storage_copy(map, key, value);
 	} else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
@@ -1648,7 +1648,7 @@ static void *___bpf_copy_key(bpfptr_t ukey, u64 key_size)
 }
 
 /* last field in 'union bpf_attr' used by this command */
-#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD flags
+#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD cpu
 
 static int map_lookup_elem(union bpf_attr *attr)
 {
@@ -1662,7 +1662,7 @@ static int map_lookup_elem(union bpf_attr *attr)
 	if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
 		return -EINVAL;
 
-	if (attr->flags & ~BPF_F_LOCK)
+	if (attr->flags & ~(BPF_F_LOCK | BPF_F_CPU))
 		return -EINVAL;
 
 	CLASS(fd, f)(attr->map_fd);
@@ -1691,11 +1691,11 @@ static int map_lookup_elem(union bpf_attr *attr)
 		if (copy_from_user(value, uvalue, value_size))
 			err = -EFAULT;
 		else
-			err = bpf_map_copy_value(map, key, value, attr->flags);
+			err = bpf_map_copy_value(map, key, value, attr->flags, attr->cpu);
 		goto free_value;
 	}
 
-	err = bpf_map_copy_value(map, key, value, attr->flags);
+	err = bpf_map_copy_value(map, key, value, attr->flags, attr->cpu);
 	if (err)
 		goto free_value;
 
@@ -1713,7 +1713,7 @@ static int map_lookup_elem(union bpf_attr *attr)
 }
 
 
-#define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
+#define BPF_MAP_UPDATE_ELEM_LAST_FIELD cpu
 
 static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
 {
@@ -1756,7 +1756,7 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
 		goto free_key;
 	}
 
-	err = bpf_map_update_value(map, fd_file(f), key, value, attr->flags);
+	err = bpf_map_update_value(map, fd_file(f), key, value, attr->flags, attr->cpu);
 	if (!err)
 		maybe_wait_bpf_programs(map);
 
@@ -1941,19 +1941,27 @@ int generic_map_update_batch(struct bpf_map *map, struct file *map_file,
 {
 	void __user *values = u64_to_user_ptr(attr->batch.values);
 	void __user *keys = u64_to_user_ptr(attr->batch.keys);
+	u64 elem_flags = attr->batch.elem_flags;
 	u32 value_size, cp, max_count;
 	void *key, *value;
 	int err = 0;
 
-	if (attr->batch.elem_flags & ~BPF_F_LOCK)
+	if (elem_flags & ~(BPF_F_LOCK | BPF_F_CPU))
 		return -EINVAL;
 
-	if ((attr->batch.elem_flags & BPF_F_LOCK) &&
+	if ((elem_flags & BPF_F_LOCK) &&
 	    !btf_record_has_field(map->record, BPF_SPIN_LOCK)) {
 		return -EINVAL;
 	}
 
-	value_size = bpf_map_value_size(map);
+	if (elem_flags & BPF_F_CPU) {
+		if (map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY)
+			return -EINVAL;
+
+		value_size = round_up(map->value_size, 8);
+	} else {
+		value_size = bpf_map_value_size(map);
+	}
 
 	max_count = attr->batch.count;
 	if (!max_count)
@@ -1980,7 +1988,8 @@ int generic_map_update_batch(struct bpf_map *map, struct file *map_file,
 			break;
 
 		err = bpf_map_update_value(map, map_file, key, value,
-					   attr->batch.elem_flags);
+					   attr->batch.elem_flags,
+					   attr->batch.cpu);
 
 		if (err)
 			break;
@@ -2005,17 +2014,25 @@ int generic_map_lookup_batch(struct bpf_map *map,
 	void __user *values = u64_to_user_ptr(attr->batch.values);
 	void __user *keys = u64_to_user_ptr(attr->batch.keys);
 	void *buf, *buf_prevkey, *prev_key, *key, *value;
+	u64 elem_flags = attr->batch.elem_flags;
 	u32 value_size, cp, max_count;
 	int err;
 
-	if (attr->batch.elem_flags & ~BPF_F_LOCK)
+	if (elem_flags & ~(BPF_F_LOCK | BPF_F_CPU))
 		return -EINVAL;
 
-	if ((attr->batch.elem_flags & BPF_F_LOCK) &&
+	if ((elem_flags & BPF_F_LOCK) &&
 	    !btf_record_has_field(map->record, BPF_SPIN_LOCK))
 		return -EINVAL;
 
-	value_size = bpf_map_value_size(map);
+	if (elem_flags & BPF_F_CPU) {
+		if (map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY)
+			return -EINVAL;
+
+		value_size = round_up(map->value_size, 8);
+	} else {
+		value_size = bpf_map_value_size(map);
+	}
 
 	max_count = attr->batch.count;
 	if (!max_count)
@@ -2050,7 +2067,8 @@ int generic_map_lookup_batch(struct bpf_map *map,
 		if (err)
 			break;
 		err = bpf_map_copy_value(map, key, value,
-					 attr->batch.elem_flags);
+					 attr->batch.elem_flags,
+					 attr->batch.cpu);
 
 		if (err == -ENOENT)
 			goto next_key;
@@ -5438,7 +5456,7 @@ static int bpf_task_fd_query(const union bpf_attr *attr,
 	return err;
 }
 
-#define BPF_MAP_BATCH_LAST_FIELD batch.flags
+#define BPF_MAP_BATCH_LAST_FIELD batch.cpu
 
 #define BPF_DO_BATCH(fn, ...)			\
 	do {					\
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 39e7818cca80..a602c45149eb 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1359,8 +1359,12 @@ enum {
 	BPF_NOEXIST	= 1, /* create new element if it didn't exist */
 	BPF_EXIST	= 2, /* update existing element */
 	BPF_F_LOCK	= 4, /* spin_lock-ed map_lookup/map_update */
+	BPF_F_CPU	= 8, /* map_update for percpu_array */
 };
 
+/* indicate updating value on all CPUs for percpu maps. */
+#define BPF_ALL_CPU	0xFFFFFFFF
+
 /* flags for BPF_MAP_CREATE command */
 enum {
 	BPF_F_NO_PREALLOC	= (1U << 0),
@@ -1514,6 +1518,7 @@ union bpf_attr {
 			__aligned_u64 next_key;
 		};
 		__u64		flags;
+		__u32		cpu;
 	};
 
 	struct { /* struct used by BPF_MAP_*_BATCH commands */
@@ -1531,6 +1536,7 @@ union bpf_attr {
 		__u32		map_fd;
 		__u64		elem_flags;
 		__u64		flags;
+		__u32		cpu;
 	} batch;
 
 	struct { /* anonymous struct used by BPF_PROG_LOAD command */
-- 
2.49.0


  reply	other threads:[~2025-06-24 16:54 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-06-24 16:53 [RFC PATCH bpf-next 0/3] bpf: Introduce BPF_F_CPU flag for percpu_array map Leon Hwang
2025-06-24 16:53 ` Leon Hwang [this message]
2025-07-01 20:22   ` [RFC PATCH bpf-next 1/3] " Andrii Nakryiko
2025-07-02 17:01     ` Leon Hwang
2025-07-02 17:13       ` Andrii Nakryiko
2025-06-24 16:53 ` [RFC PATCH bpf-next 2/3] bpf, libbpf: Support BPF_F_CPU " Leon Hwang
2025-07-01 20:22   ` Andrii Nakryiko
2025-07-02 17:28     ` Leon Hwang
2025-07-02 17:30       ` Andrii Nakryiko
2025-07-02 17:32         ` Leon Hwang
2025-06-24 16:53 ` [RFC PATCH bpf-next 3/3] selftests/bpf: Add case to test BPF_F_CPU Leon Hwang
2025-07-01 20:22   ` Andrii Nakryiko
2025-07-02 17:29     ` Leon Hwang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250624165354.27184-2-leon.hwang@linux.dev \
    --to=leon.hwang@linux.dev \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.