Re: [PATCH bpf-next v2 1/3] bpf: Introduce BPF_F_CPU flag for percpu_array maps

All of lore.kernel.org
 help / color / mirror / Atom feed

From: "Leon Hwang" <leon.hwang@linux.dev>
To: "Jiri Olsa" <olsajiri@gmail.com>
Cc: <bpf@vger.kernel.org>, <ast@kernel.org>, <andrii@kernel.org>,
	<daniel@iogearbox.net>, <yonghong.song@linux.dev>,
	<song@kernel.org>, <eddyz87@gmail.com>, <dxu@dxuuu.xyz>,
	<deso@posteo.net>, <kernel-patches-bot@fb.com>
Subject: Re: [PATCH bpf-next v2 1/3] bpf: Introduce BPF_F_CPU flag for percpu_array maps
Date: Fri, 08 Aug 2025 00:26:55 +0800	[thread overview]
Message-ID: <DBWC4H7DD274.1UTL72GTCJ355@linux.dev> (raw)
In-Reply-To: <aJRlKj0UjdtDM6SU@krava>

On Thu Aug 7, 2025 at 4:34 PM +08, Jiri Olsa wrote:
> On Wed, Aug 06, 2025 at 12:30:15AM +0800, Leon Hwang wrote:
>> Introduce support for the BPF_F_CPU flag in percpu_array maps to allow
>> updating values for specified CPU or for all CPUs with a single value.
>>

[...]

>> diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
>> index 3d080916faf97..98759f0b22397 100644
>> --- a/kernel/bpf/arraymap.c
>> +++ b/kernel/bpf/arraymap.c
>> @@ -295,17 +295,24 @@ static void *percpu_array_map_lookup_percpu_elem(struct bpf_map *map, void *key,
>>  	return per_cpu_ptr(array->pptrs[index & array->index_mask], cpu);
>>  }
>>
>> -int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
>> +int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value, u64 flags)
>>  {
>>  	struct bpf_array *array = container_of(map, struct bpf_array, map);
>>  	u32 index = *(u32 *)key;
>>  	void __percpu *pptr;
>> -	int cpu, off = 0;
>> -	u32 size;
>> +	u32 size, cpu;
>> +	int off = 0;
>>
>>  	if (unlikely(index >= array->map.max_entries))
>>  		return -ENOENT;
>>
>> +	cpu = flags >> 32;
>> +	flags &= (u32)~0;
>
> is this necessary?
>

It is unnecessary.

I'll remove it and update

    if (unlikely((u32)flags > BPF_F_CPU))
        return -EINVAL;

>> +	if (unlikely(flags > BPF_F_CPU))
>> +		return -EINVAL;
>> +	if (unlikely((flags & BPF_F_CPU) && cpu >= num_possible_cpus()))
>> +		return -ERANGE;
>
> should we check cpu != BPF_ALL_CPUS in here?
>

No. It is meaningless to support cpu == BPF_ALL_CPUS, because
(flags & BPF_F_CPU) && cpu == BPF_ALL_CPUS is same as ~BPF_F_CPU.

>> +
>>  	/* per_cpu areas are zero-filled and bpf programs can only
>>  	 * access 'value_size' of them, so copying rounded areas
>>  	 * will not leak any kernel data
>> @@ -313,10 +320,15 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
>>  	size = array->elem_size;
>>  	rcu_read_lock();
>>  	pptr = array->pptrs[index & array->index_mask];
>> -	for_each_possible_cpu(cpu) {
>> -		copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu));
>> -		check_and_init_map_value(map, value + off);
>> -		off += size;
>> +	if (flags & BPF_F_CPU) {
>> +		copy_map_value_long(map, value, per_cpu_ptr(pptr, cpu));
>> +		check_and_init_map_value(map, value);
>> +	} else {
>> +		for_each_possible_cpu(cpu) {
>> +			copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu));
>> +			check_and_init_map_value(map, value + off);
>> +			off += size;
>> +		}
>>  	}
>>  	rcu_read_unlock();
>>  	return 0;
>> @@ -387,13 +399,20 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
>>  	struct bpf_array *array = container_of(map, struct bpf_array, map);
>>  	u32 index = *(u32 *)key;
>>  	void __percpu *pptr;
>> -	int cpu, off = 0;
>> -	u32 size;
>> +	u32 size, cpu;
>> +	int off = 0;
>>
>> -	if (unlikely(map_flags > BPF_EXIST))
>> +	cpu = map_flags >> 32;
>> +	map_flags &= (u32)~0;
>> +	if (unlikely(map_flags > BPF_F_CPU))
>>  		/* unknown flags */
>>  		return -EINVAL;
>>
>> +	if (unlikely((map_flags & BPF_F_CPU) && cpu != BPF_ALL_CPUS &&
>> +		     cpu >= num_possible_cpus()))
>> +		/* invalid cpu */
>> +		return -ERANGE;
>
> looks like same check as in bpf_percpu_array_copy, maybe we could add
> some helper function for that?
>

If they are same, I'd like to add a helper function.

>> +
>>  	if (unlikely(index >= array->map.max_entries))
>>  		/* all elements were pre-allocated, cannot insert a new one */
>>  		return -E2BIG;
>> @@ -411,10 +430,19 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
>>  	size = array->elem_size;
>>  	rcu_read_lock();
>>  	pptr = array->pptrs[index & array->index_mask];
>> -	for_each_possible_cpu(cpu) {
>> -		copy_map_value_long(map, per_cpu_ptr(pptr, cpu), value + off);
>> +	if ((map_flags & BPF_F_CPU) && cpu != BPF_ALL_CPUS) {
>> +		copy_map_value_long(map, per_cpu_ptr(pptr, cpu), value);
>>  		bpf_obj_free_fields(array->map.record, per_cpu_ptr(pptr, cpu));
>> -		off += size;
>> +	} else {
>> +		for_each_possible_cpu(cpu) {
>> +			copy_map_value_long(map, per_cpu_ptr(pptr, cpu), value + off);
>> +			/* same user-provided value is used if BPF_F_CPU is specified,
>> +			 * otherwise value is an array of per-cpu values.
>> +			 */
>> +			if (!(map_flags & BPF_F_CPU))
>> +				off += size;
>> +			bpf_obj_free_fields(array->map.record, per_cpu_ptr(pptr, cpu));
>> +		}
>>  	}
>>  	rcu_read_unlock();
>>  	return 0;
>> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
>> index 0fbfa8532c392..43f19d02bc5ce 100644
>> --- a/kernel/bpf/syscall.c
>> +++ b/kernel/bpf/syscall.c
>> @@ -131,8 +131,11 @@ bool bpf_map_write_active(const struct bpf_map *map)
>>  	return atomic64_read(&map->writecnt) != 0;
>>  }
>>
>> -static u32 bpf_map_value_size(const struct bpf_map *map)
>> +static u32 bpf_map_value_size(const struct bpf_map *map, u64 flags)
>>  {
>> +	if ((flags & BPF_F_CPU) && map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
>> +		return round_up(map->value_size, 8);
>> +
>
> nit, maybe we could keep the same style like below and check the map
> type first:
>
> 	if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY && (flags & BPF_F_CPU))
> 		return round_up(map->value_size, 8);
> 	else if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
>

Ack.

>>  	    map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
>>  	    map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY ||
>> @@ -314,7 +317,7 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
>>  	    map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
>>  		err = bpf_percpu_hash_copy(map, key, value);
>>  	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
>> -		err = bpf_percpu_array_copy(map, key, value);
>> +		err = bpf_percpu_array_copy(map, key, value, flags);
>>  	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
>>  		err = bpf_percpu_cgroup_storage_copy(map, key, value);
>>  	} else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
>> @@ -1669,7 +1672,10 @@ static int map_lookup_elem(union bpf_attr *attr)
>>  	if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
>>  		return -EINVAL;
>>
>> -	if (attr->flags & ~BPF_F_LOCK)
>> +	if ((u32)attr->flags & ~(BPF_F_LOCK | BPF_F_CPU))
>> +		return -EINVAL;
>
> I understand the u32 cast in here..
>
>> +
>> +	if (!((u32)attr->flags & BPF_F_CPU) && attr->flags >> 32)
>>  		return -EINVAL;
>
> .. but do we need it in here and other similar places below?
>

You are right. They are unnecessary.

I will remove them in next revision.

>>
>>  	CLASS(fd, f)(attr->map_fd);
>> @@ -1679,7 +1685,7 @@ static int map_lookup_elem(union bpf_attr *attr)
>>  	if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ))
>>  		return -EPERM;
>>
>> -	if ((attr->flags & BPF_F_LOCK) &&
>> +	if (((u32)attr->flags & BPF_F_LOCK) &&
>>  	    !btf_record_has_field(map->record, BPF_SPIN_LOCK))
>>  		return -EINVAL;
>>
>> @@ -1687,7 +1693,7 @@ static int map_lookup_elem(union bpf_attr *attr)
>>  	if (IS_ERR(key))
>>  		return PTR_ERR(key);
>>
>> -	value_size = bpf_map_value_size(map);
>> +	value_size = bpf_map_value_size(map, attr->flags);
>>
>>  	err = -ENOMEM;
>>  	value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
>> @@ -1744,19 +1750,24 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
>>  		goto err_put;
>>  	}
>>
>> -	if ((attr->flags & BPF_F_LOCK) &&
>> +	if (((u32)attr->flags & BPF_F_LOCK) &&
>>  	    !btf_record_has_field(map->record, BPF_SPIN_LOCK)) {
>>  		err = -EINVAL;
>>  		goto err_put;
>>  	}
>>
>> +	if (!((u32)attr->flags & BPF_F_CPU) && attr->flags >> 32) {
>> +		err = -EINVAL;
>> +		goto err_put;
>> +	}
>> +
>>  	key = ___bpf_copy_key(ukey, map->key_size);
>>  	if (IS_ERR(key)) {
>>  		err = PTR_ERR(key);
>>  		goto err_put;
>>  	}
>>
>> -	value_size = bpf_map_value_size(map);
>> +	value_size = bpf_map_value_size(map, attr->flags);
>>  	value = kvmemdup_bpfptr(uvalue, value_size);
>>  	if (IS_ERR(value)) {
>>  		err = PTR_ERR(value);
>> @@ -1942,6 +1953,25 @@ int generic_map_delete_batch(struct bpf_map *map,
>>  	return err;
>>  }
>>
>> +static int check_map_batch_elem_flags(struct bpf_map *map, u64 elem_flags)
>> +{
>> +	u32 flags = elem_flags;
>> +
>> +	if (flags & ~(BPF_F_LOCK | BPF_F_CPU))
>> +		return -EINVAL;
>> +
>> +	if ((flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK))
>> +		return -EINVAL;
>> +
>> +	if (!(flags & BPF_F_CPU) && elem_flags >> 32)
>> +		return -EINVAL;
>> +
>> +	if ((flags & BPF_F_CPU) && map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY)
>> +		return -EINVAL;
>> +
>> +	return 0;
>> +}
>
> it seems like this check could be used also for non-batch functions as well?
>
> also it might be more readable if we factor some check_flags function in
> separate patch and then add BPF_F_CPU support
>

Sure. After doing a poc of adding check_flags helper function, this
check can be used also for non-batch functions.

>
>> +
>>  int generic_map_update_batch(struct bpf_map *map, struct file *map_file,
>>  			     const union bpf_attr *attr,
>>  			     union bpf_attr __user *uattr)
>> @@ -1952,15 +1982,11 @@ int generic_map_update_batch(struct bpf_map *map, struct file *map_file,
>>  	void *key, *value;
>>  	int err = 0;
>>
>> -	if (attr->batch.elem_flags & ~BPF_F_LOCK)
>> -		return -EINVAL;
>> -
>> -	if ((attr->batch.elem_flags & BPF_F_LOCK) &&
>> -	    !btf_record_has_field(map->record, BPF_SPIN_LOCK)) {
>> -		return -EINVAL;
>> -	}
>> +	err = check_map_batch_elem_flags(map, attr->batch.elem_flags);
>> +	if (err)
>> +		return err;
>>
>> -	value_size = bpf_map_value_size(map);
>> +	value_size = bpf_map_value_size(map, attr->batch.elem_flags);
>>
>>  	max_count = attr->batch.count;
>>  	if (!max_count)
>> @@ -1986,9 +2012,7 @@ int generic_map_update_batch(struct bpf_map *map, struct file *map_file,
>>  		    copy_from_user(value, values + cp * value_size, value_size))
>>  			break;
>>
>> -		err = bpf_map_update_value(map, map_file, key, value,
>> -					   attr->batch.elem_flags);
>> -
>> +		err = bpf_map_update_value(map, map_file, key, value, attr->batch.elem_flags);
>
> there's no change in here right? I'd keep it as it is
>

Ack.

>>  		if (err)
>>  			break;
>>  		cond_resched();
>> @@ -2015,14 +2039,11 @@ int generic_map_lookup_batch(struct bpf_map *map,
>>  	u32 value_size, cp, max_count;
>>  	int err;
>>
>> -	if (attr->batch.elem_flags & ~BPF_F_LOCK)
>> -		return -EINVAL;
>> -
>> -	if ((attr->batch.elem_flags & BPF_F_LOCK) &&
>> -	    !btf_record_has_field(map->record, BPF_SPIN_LOCK))
>> -		return -EINVAL;
>> +	err = check_map_batch_elem_flags(map, attr->batch.elem_flags);
>> +	if (err)
>> +		return err;
>>
>> -	value_size = bpf_map_value_size(map);
>> +	value_size = bpf_map_value_size(map, attr->batch.elem_flags);
>>
>>  	max_count = attr->batch.count;
>>  	if (!max_count)
>> @@ -2056,9 +2077,7 @@ int generic_map_lookup_batch(struct bpf_map *map,
>>  		rcu_read_unlock();
>>  		if (err)
>>  			break;
>> -		err = bpf_map_copy_value(map, key, value,
>> -					 attr->batch.elem_flags);
>> -
>> +		err = bpf_map_copy_value(map, key, value, attr->batch.elem_flags);
>
> ditto
>

Ack.

>
> thanks,
> jirka
>

Thanks,
Leon

[...]

next prev parent reply	other threads:[~2025-08-07 16:27 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-08-05 16:30 [PATCH bpf-next v2 0/3] bpf: Introduce BPF_F_CPU flag for percpu_array maps Leon Hwang
2025-08-05 16:30 ` [PATCH bpf-next v2 1/3] " Leon Hwang
2025-08-07  8:34   ` Jiri Olsa
2025-08-07 16:26     ` Leon Hwang [this message]
2025-08-07 17:20   ` Alexei Starovoitov
2025-08-08 16:11     ` Leon Hwang
2025-08-08 16:23       ` Alexei Starovoitov
2025-08-11 16:34         ` Leon Hwang
2025-08-05 16:30 ` [PATCH bpf-next v2 2/3] libbpf: Support BPF_F_CPU " Leon Hwang
2025-08-05 16:30 ` [PATCH bpf-next v2 3/3] selftests/bpf: Add case to test BPF_F_CPU Leon Hwang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=DBWC4H7DD274.1UTL72GTCJ355@linux.dev \
    --to=leon.hwang@linux.dev \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=deso@posteo.net \
    --cc=dxu@dxuuu.xyz \
    --cc=eddyz87@gmail.com \
    --cc=kernel-patches-bot@fb.com \
    --cc=olsajiri@gmail.com \
    --cc=song@kernel.org \
    --cc=yonghong.song@linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.