BPF List
 help / color / mirror / Atom feed
From: Martin KaFai Lau <martin.lau@linux.dev>
To: Dave Marchevsky <davemarchevsky@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Andrii Nakryiko <andrii@kernel.org>,
	Martin KaFai Lau <martin.lau@kernel.org>,
	Kernel Team <kernel-team@fb.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	bpf@vger.kernel.org
Subject: Re: [PATCH v1 bpf-next 1/2] bpf: Support BPF_F_MMAPABLE task_local storage
Date: Mon, 20 Nov 2023 16:42:21 -0800	[thread overview]
Message-ID: <9b037dde-e65c-4d1a-8295-68d51ac3ce25@linux.dev> (raw)
In-Reply-To: <20231120175925.733167-2-davemarchevsky@fb.com>

On 11/20/23 9:59 AM, Dave Marchevsky wrote:
> diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
> index 173ec7f43ed1..114973f925ea 100644
> --- a/include/linux/bpf_local_storage.h
> +++ b/include/linux/bpf_local_storage.h
> @@ -69,7 +69,17 @@ struct bpf_local_storage_data {
>   	 * the number of cachelines accessed during the cache hit case.
>   	 */
>   	struct bpf_local_storage_map __rcu *smap;
> -	u8 data[] __aligned(8);
> +	/* Need to duplicate smap's map_flags as smap may be gone when
> +	 * it's time to free bpf_local_storage_data
> +	 */
> +	u64 smap_map_flags;
> +	/* If BPF_F_MMAPABLE, this is a void * to separately-alloc'd data
> +	 * Otherwise the actual mapval data lives here
> +	 */
> +	union {
> +		DECLARE_FLEX_ARRAY(u8, data) __aligned(8);
> +		void *actual_data __aligned(8);

The pages (that can be mmap'ed later) feel like a specific kind of kptr.

Have you thought about allowing a kptr (pointing to some pages that can be 
mmap'ed later) to be stored as one of the members of the map's value as a kptr. 
bpf_local_storage_map is one of the maps that supports kptr.

struct normal_and_mmap_value {
     int some_int;
     int __percpu_kptr *some_cnts;

     struct bpf_mmap_page __kptr *some_stats;
};

struct mmap_only_value {
     struct bpf_mmap_page __kptr *some_stats;
};

[ ... ]

> diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
> index 146824cc9689..9b3becbcc1a3 100644
> --- a/kernel/bpf/bpf_local_storage.c
> +++ b/kernel/bpf/bpf_local_storage.c
> @@ -15,7 +15,8 @@
>   #include <linux/rcupdate_trace.h>
>   #include <linux/rcupdate_wait.h>
>   
> -#define BPF_LOCAL_STORAGE_CREATE_FLAG_MASK (BPF_F_NO_PREALLOC | BPF_F_CLONE)
> +#define BPF_LOCAL_STORAGE_CREATE_FLAG_MASK \
> +	(BPF_F_NO_PREALLOC | BPF_F_CLONE | BPF_F_MMAPABLE)
>   
>   static struct bpf_local_storage_map_bucket *
>   select_bucket(struct bpf_local_storage_map *smap,
> @@ -24,6 +25,51 @@ select_bucket(struct bpf_local_storage_map *smap,
>   	return &smap->buckets[hash_ptr(selem, smap->bucket_log)];
>   }
>   
> +struct mem_cgroup *bpf_map_get_memcg(const struct bpf_map *map);
> +
> +void *alloc_mmapable_selem_value(struct bpf_local_storage_map *smap)
> +{
> +	struct mem_cgroup *memcg, *old_memcg;
> +	void *ptr;
> +
> +	memcg = bpf_map_get_memcg(&smap->map);
> +	old_memcg = set_active_memcg(memcg);
> +	ptr = bpf_map_area_mmapable_alloc(PAGE_ALIGN(smap->map.value_size),
> +					  NUMA_NO_NODE);
> +	set_active_memcg(old_memcg);
> +	mem_cgroup_put(memcg);
> +
> +	return ptr;
> +}

[ ... ]

> @@ -76,10 +122,19 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
>   		void *value, bool charge_mem, gfp_t gfp_flags)
>   {
>   	struct bpf_local_storage_elem *selem;
> +	void *mmapable_value = NULL;
> +	u32 selem_mem;
>   
> -	if (charge_mem && mem_charge(smap, owner, smap->elem_size))
> +	selem_mem = selem_bytes_used(smap);
> +	if (charge_mem && mem_charge(smap, owner, selem_mem))
>   		return NULL;
>   
> +	if (smap->map.map_flags & BPF_F_MMAPABLE) {
> +		mmapable_value = alloc_mmapable_selem_value(smap);

This probably is not always safe for bpf prog to do. Leaving the gfp_flags was 
not used aside, the bpf local storage is moving to the bpf's memalloc because of 
https://lore.kernel.org/bpf/20221118190109.1512674-1-namhyung@kernel.org/

> +		if (!mmapable_value)
> +			goto err_out;
> +	}
> +


  parent reply	other threads:[~2023-11-21  0:42 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-11-20 17:59 [PATCH v1 bpf-next 0/2] bpf: Add mmapable task_local storage Dave Marchevsky
2023-11-20 17:59 ` [PATCH v1 bpf-next 1/2] bpf: Support BPF_F_MMAPABLE " Dave Marchevsky
2023-11-20 21:41   ` Johannes Weiner
2023-11-21  0:42   ` Martin KaFai Lau [this message]
2023-11-21  6:11     ` David Marchevsky
2023-11-21 19:27       ` Martin KaFai Lau
2023-11-21 19:49         ` Alexei Starovoitov
2023-12-11 17:31           ` David Marchevsky
2023-11-21  2:32   ` kernel test robot
2023-11-21  5:06   ` kernel test robot
2023-11-21  5:20   ` kernel test robot
2023-11-21  5:44   ` Alexei Starovoitov
2023-11-21  6:41   ` Yonghong Song
2023-11-21 15:34   ` Yonghong Song
2023-11-21 19:30   ` Andrii Nakryiko
2023-11-20 17:59 ` [PATCH v1 bpf-next 2/2] selftests/bpf: Add test exercising mmapable task_local_storage Dave Marchevsky
2023-11-21 19:34   ` Andrii Nakryiko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=9b037dde-e65c-4d1a-8295-68d51ac3ce25@linux.dev \
    --to=martin.lau@linux.dev \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=davemarchevsky@fb.com \
    --cc=hannes@cmpxchg.org \
    --cc=kernel-team@fb.com \
    --cc=martin.lau@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox