* [PATCH] bpf: Fix percpu address space issues
@ 2024-08-04 18:55 Uros Bizjak
2024-08-09 8:28 ` Eduard Zingerman
0 siblings, 1 reply; 5+ messages in thread
From: Uros Bizjak @ 2024-08-04 18:55 UTC (permalink / raw)
To: bpf, linux-kernel
Cc: Uros Bizjak, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
Martin KaFai Lau, Eduard Zingerman, Song Liu, Yonghong Song,
John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa
In arraymap.c:
Assign return values of bpf_array_map_seq_start() and
bpf_array_map_seq_next() from the non-percpu array.
Correct the declaration of pptr pointer in __bpf_array_map_seq_show()
to void * __percpu * (void __percpu pointer to void pointer) and
cast the value from the generic address space to the __percpu
address space via uintptr_t [1].
In hashtab.c:
Assign the return value from bpf_mem_cache_alloc() to void pointer
and cast the value to void __percpu ** (void pointer to percpu void
pointer) before dereferencing.
In memalloc.c:
Explicitly declare __percpu variables.
Cast obj to void __percpu **.
In helpers.c:
Cast ptr in BPF_CALL_1 and BPF_CALL_2 from generic address space
to __percpu address space via const uintptr_t [1].
Found by GCC's named address space checks.
There were no changes in the resulting object files.
[1] https://sparse.docs.kernel.org/en/latest/annotations.html#address-space-name
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: Eduard Zingerman <eddyz87@gmail.com>
Cc: Song Liu <song@kernel.org>
Cc: Yonghong Song <yonghong.song@linux.dev>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: KP Singh <kpsingh@kernel.org>
Cc: Stanislav Fomichev <sdf@fomichev.me>
Cc: Hao Luo <haoluo@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
---
kernel/bpf/arraymap.c | 8 ++++----
kernel/bpf/hashtab.c | 8 ++++----
kernel/bpf/helpers.c | 4 ++--
kernel/bpf/memalloc.c | 12 ++++++------
4 files changed, 16 insertions(+), 16 deletions(-)
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 188e3c2effb2..544ca433275e 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -600,7 +600,7 @@ static void *bpf_array_map_seq_start(struct seq_file *seq, loff_t *pos)
array = container_of(map, struct bpf_array, map);
index = info->index & array->index_mask;
if (info->percpu_value_buf)
- return array->pptrs[index];
+ return array->ptrs[index];
return array_map_elem_ptr(array, index);
}
@@ -619,7 +619,7 @@ static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
array = container_of(map, struct bpf_array, map);
index = info->index & array->index_mask;
if (info->percpu_value_buf)
- return array->pptrs[index];
+ return array->ptrs[index];
return array_map_elem_ptr(array, index);
}
@@ -632,7 +632,7 @@ static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
struct bpf_iter_meta meta;
struct bpf_prog *prog;
int off = 0, cpu = 0;
- void __percpu **pptr;
+ void * __percpu *pptr;
u32 size;
meta.seq = seq;
@@ -648,7 +648,7 @@ static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
if (!info->percpu_value_buf) {
ctx.value = v;
} else {
- pptr = v;
+ pptr = (void __percpu *)(uintptr_t)v;
size = array->elem_size;
for_each_possible_cpu(cpu) {
copy_map_value_long(map, info->percpu_value_buf + off,
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index be1f64c20125..a49212bbda09 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -1049,14 +1049,14 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
pptr = htab_elem_get_ptr(l_new, key_size);
} else {
/* alloc_percpu zero-fills */
- pptr = bpf_mem_cache_alloc(&htab->pcpu_ma);
- if (!pptr) {
+ void *ptr = bpf_mem_cache_alloc(&htab->pcpu_ma);
+ if (!ptr) {
bpf_mem_cache_free(&htab->ma, l_new);
l_new = ERR_PTR(-ENOMEM);
goto dec_count;
}
- l_new->ptr_to_pptr = pptr;
- pptr = *(void **)pptr;
+ l_new->ptr_to_pptr = ptr;
+ pptr = *(void __percpu **)ptr;
}
pcpu_init_value(htab, pptr, value, onallcpus);
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index d02ae323996b..dd7529153146 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -715,7 +715,7 @@ BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
if (cpu >= nr_cpu_ids)
return (unsigned long)NULL;
- return (unsigned long)per_cpu_ptr((const void __percpu *)ptr, cpu);
+ return (unsigned long)per_cpu_ptr((const void __percpu *)(const uintptr_t)ptr, cpu);
}
const struct bpf_func_proto bpf_per_cpu_ptr_proto = {
@@ -728,7 +728,7 @@ const struct bpf_func_proto bpf_per_cpu_ptr_proto = {
BPF_CALL_1(bpf_this_cpu_ptr, const void *, percpu_ptr)
{
- return (unsigned long)this_cpu_ptr((const void __percpu *)percpu_ptr);
+ return (unsigned long)this_cpu_ptr((const void __percpu *)(const uintptr_t)percpu_ptr);
}
const struct bpf_func_proto bpf_this_cpu_ptr_proto = {
diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index dec892ded031..b3858a76e0b3 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -138,8 +138,8 @@ static struct llist_node notrace *__llist_del_first(struct llist_head *head)
static void *__alloc(struct bpf_mem_cache *c, int node, gfp_t flags)
{
if (c->percpu_size) {
- void **obj = kmalloc_node(c->percpu_size, flags, node);
- void *pptr = __alloc_percpu_gfp(c->unit_size, 8, flags);
+ void __percpu **obj = kmalloc_node(c->percpu_size, flags, node);
+ void __percpu *pptr = __alloc_percpu_gfp(c->unit_size, 8, flags);
if (!obj || !pptr) {
free_percpu(pptr);
@@ -253,7 +253,7 @@ static void alloc_bulk(struct bpf_mem_cache *c, int cnt, int node, bool atomic)
static void free_one(void *obj, bool percpu)
{
if (percpu) {
- free_percpu(((void **)obj)[1]);
+ free_percpu(((void __percpu **)obj)[1]);
kfree(obj);
return;
}
@@ -509,8 +509,8 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
*/
int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
{
- struct bpf_mem_caches *cc, __percpu *pcc;
- struct bpf_mem_cache *c, __percpu *pc;
+ struct bpf_mem_caches *cc; struct bpf_mem_caches __percpu *pcc;
+ struct bpf_mem_cache *c; struct bpf_mem_cache __percpu *pc;
struct obj_cgroup *objcg = NULL;
int cpu, i, unit_size, percpu_size = 0;
@@ -591,7 +591,7 @@ int bpf_mem_alloc_percpu_init(struct bpf_mem_alloc *ma, struct obj_cgroup *objcg
int bpf_mem_alloc_percpu_unit_init(struct bpf_mem_alloc *ma, int size)
{
- struct bpf_mem_caches *cc, __percpu *pcc;
+ struct bpf_mem_caches *cc; struct bpf_mem_caches __percpu *pcc;
int cpu, i, unit_size, percpu_size;
struct obj_cgroup *objcg;
struct bpf_mem_cache *c;
--
2.42.0
^ permalink raw reply related [flat|nested] 5+ messages in thread* Re: [PATCH] bpf: Fix percpu address space issues
2024-08-04 18:55 [PATCH] bpf: Fix percpu address space issues Uros Bizjak
@ 2024-08-09 8:28 ` Eduard Zingerman
2024-08-09 10:15 ` Uros Bizjak
0 siblings, 1 reply; 5+ messages in thread
From: Eduard Zingerman @ 2024-08-09 8:28 UTC (permalink / raw)
To: Uros Bizjak, bpf; +Cc: Alexei Starovoitov
On Sun, 2024-08-04 at 20:55 +0200, Uros Bizjak wrote:
[...]
> Found by GCC's named address space checks.
Please provide some additional details.
I assume that the definition of __percpu was changed from
__attribute__((btf_type_tag(percpu))) to
__attribute__((address_space(??)), is that correct?
What is the motivation for this patch?
Currently __percpu is defined as a type tag and is used only by BPF verifier,
where it seems to be relevant only for structure fields and function parameters.
This patch only changes local variables.
> There were no changes in the resulting object files.
>
> [1] https://sparse.docs.kernel.org/en/latest/annotations.html#address-space-name
>
> Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
> Cc: Alexei Starovoitov <ast@kernel.org>
> Cc: Daniel Borkmann <daniel@iogearbox.net>
> Cc: Andrii Nakryiko <andrii@kernel.org>
> Cc: Martin KaFai Lau <martin.lau@linux.dev>
> Cc: Eduard Zingerman <eddyz87@gmail.com>
> Cc: Song Liu <song@kernel.org>
> Cc: Yonghong Song <yonghong.song@linux.dev>
> Cc: John Fastabend <john.fastabend@gmail.com>
> Cc: KP Singh <kpsingh@kernel.org>
> Cc: Stanislav Fomichev <sdf@fomichev.me>
> Cc: Hao Luo <haoluo@google.com>
> Cc: Jiri Olsa <jolsa@kernel.org>
> ---
> kernel/bpf/arraymap.c | 8 ++++----
> kernel/bpf/hashtab.c | 8 ++++----
> kernel/bpf/helpers.c | 4 ++--
> kernel/bpf/memalloc.c | 12 ++++++------
> 4 files changed, 16 insertions(+), 16 deletions(-)
>
> diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
> index 188e3c2effb2..544ca433275e 100644
> --- a/kernel/bpf/arraymap.c
> +++ b/kernel/bpf/arraymap.c
> @@ -600,7 +600,7 @@ static void *bpf_array_map_seq_start(struct seq_file *seq, loff_t *pos)
> array = container_of(map, struct bpf_array, map);
> index = info->index & array->index_mask;
> if (info->percpu_value_buf)
> - return array->pptrs[index];
> + return array->ptrs[index];
I disagree with this change.
One might say that indeed the address space is cast away here,
however, value returned by this function is only used in functions
bpf_array_map_seq_{next,show,stop}(), where it is guarded by the same
'if (info->percpu_value_buf)' condition to identify if per_cpu_ptr()
is necessary.
> return array_map_elem_ptr(array, index);
> }
>
> @@ -619,7 +619,7 @@ static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
> array = container_of(map, struct bpf_array, map);
> index = info->index & array->index_mask;
> if (info->percpu_value_buf)
> - return array->pptrs[index];
> + return array->ptrs[index];
Same as above.
> return array_map_elem_ptr(array, index);
> }
>
> @@ -632,7 +632,7 @@ static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
> struct bpf_iter_meta meta;
> struct bpf_prog *prog;
> int off = 0, cpu = 0;
> - void __percpu **pptr;
> + void * __percpu *pptr;
Should this be 'void __percpu *pptr;?
The value comes from array->pptrs[*] field,
which has the above type for elements.
> u32 size;
>
> meta.seq = seq;
> @@ -648,7 +648,7 @@ static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
> if (!info->percpu_value_buf) {
> ctx.value = v;
> } else {
> - pptr = v;
> + pptr = (void __percpu *)(uintptr_t)v;
> size = array->elem_size;
> for_each_possible_cpu(cpu) {
> copy_map_value_long(map, info->percpu_value_buf + off,
> diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
> index be1f64c20125..a49212bbda09 100644
> --- a/kernel/bpf/hashtab.c
> +++ b/kernel/bpf/hashtab.c
> @@ -1049,14 +1049,14 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
> pptr = htab_elem_get_ptr(l_new, key_size);
> } else {
> /* alloc_percpu zero-fills */
> - pptr = bpf_mem_cache_alloc(&htab->pcpu_ma);
> - if (!pptr) {
> + void *ptr = bpf_mem_cache_alloc(&htab->pcpu_ma);
> + if (!ptr) {
Why adding an intermediate variable here?
Is casting bpf_mem_cache_alloc() result to percpu not sufficient?
It looks like bpf_mem_cache_alloc() returns a percpu pointer,
should it be declared as such?
> bpf_mem_cache_free(&htab->ma, l_new);
> l_new = ERR_PTR(-ENOMEM);
> goto dec_count;
> }
> - l_new->ptr_to_pptr = pptr;
> - pptr = *(void **)pptr;
> + l_new->ptr_to_pptr = ptr;
> + pptr = *(void __percpu **)ptr;
> }
>
> pcpu_init_value(htab, pptr, value, onallcpus);
[...]
> diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
> index dec892ded031..b3858a76e0b3 100644
> --- a/kernel/bpf/memalloc.c
> +++ b/kernel/bpf/memalloc.c
> @@ -138,8 +138,8 @@ static struct llist_node notrace *__llist_del_first(struct llist_head *head)
> static void *__alloc(struct bpf_mem_cache *c, int node, gfp_t flags)
> {
> if (c->percpu_size) {
> - void **obj = kmalloc_node(c->percpu_size, flags, node);
> - void *pptr = __alloc_percpu_gfp(c->unit_size, 8, flags);
> + void __percpu **obj = kmalloc_node(c->percpu_size, flags, node);
Why __percpu is needed for obj?
kmalloc_node is defined as 'alloc_hooks(kmalloc_node_noprof(__VA_ARGS__))',
alloc_hooks(X) is a macro and it produces result of type typeof(X),
kmalloc_node_noprof() returns void*, not __percpu void*.
Do I miss something?
> + void __percpu *pptr = __alloc_percpu_gfp(c->unit_size, 8, flags);
>
> if (!obj || !pptr) {
> free_percpu(pptr);
[...]
^ permalink raw reply [flat|nested] 5+ messages in thread* Re: [PATCH] bpf: Fix percpu address space issues
2024-08-09 8:28 ` Eduard Zingerman
@ 2024-08-09 10:15 ` Uros Bizjak
2024-08-09 20:29 ` Eduard Zingerman
0 siblings, 1 reply; 5+ messages in thread
From: Uros Bizjak @ 2024-08-09 10:15 UTC (permalink / raw)
To: Eduard Zingerman; +Cc: bpf, Alexei Starovoitov
[-- Attachment #1: Type: text/plain, Size: 9209 bytes --]
On Fri, Aug 9, 2024 at 10:28 AM Eduard Zingerman <eddyz87@gmail.com> wrote:
>
> On Sun, 2024-08-04 at 20:55 +0200, Uros Bizjak wrote:
>
> [...]
>
> > Found by GCC's named address space checks.
>
> Please provide some additional details.
> I assume that the definition of __percpu was changed from
> __attribute__((btf_type_tag(percpu))) to
> __attribute__((address_space(??)), is that correct?
This is correct. The fixes in the patch are based on the patch series
[1] that enable strict percpu check via GCC's x86 named address space
qualifiers, and in its RFC state hacks __seg_gs into the __percpu
qualifier (as can be seen in the 3/3 patch). The compiler will detect
pointer address space mismatches for e.g.:
--cut here--
int __seg_gs m;
int *foo (void) { return &m; }
--cut here--
v.c: In function ‘foo’:
v.c:5:26: error: return from pointer to non-enclosed address space
5 | int *foo (void) { return &m; }
| ^~
v.c:5:26: note: expected ‘int *’ but pointer is of type ‘__seg_gs int *’
and expects explicit casts via uintptr_t when these casts are really
intended ([2], please also see [3] for similar sparse requirement):
int *foo (void) { return (int *)(uintptr_t)&m; }
[1] https://lore.kernel.org/lkml/20240805184012.358023-1-ubizjak@gmail.com/
[2] https://gcc.gnu.org/onlinedocs/gcc/Named-Address-Spaces.html#x86-Named-Address-Spaces
[3] https://sparse.docs.kernel.org/en/latest/annotations.html#address-space-name
For reference, I have attached a test source file with more complex
checks that also includes linux percpu verifier macro.
> What is the motivation for this patch?
With the percpu checker patch applied, the build will break with the
above error due to mismatched address space pointers.
> Currently __percpu is defined as a type tag and is used only by BPF verifier,
> where it seems to be relevant only for structure fields and function parameters.
> This patch only changes local variables.
__percpu is also used for sparse checks (Use C=1 CHECK="sparse
-Wcast-from-as"). Sparse also reports address space violations, but
its warnings are not fatal and are not as precise as the compiler. So,
if you try to compile the bpf source when the kernel source is patched
with the percpu checker, the compiler will report several errors that
my bpf patch tries to fix.
> > There were no changes in the resulting object files.
> >
> > [1] https://sparse.docs.kernel.org/en/latest/annotations.html#address-space-name
> >
> > Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
> > Cc: Alexei Starovoitov <ast@kernel.org>
> > Cc: Daniel Borkmann <daniel@iogearbox.net>
> > Cc: Andrii Nakryiko <andrii@kernel.org>
> > Cc: Martin KaFai Lau <martin.lau@linux.dev>
> > Cc: Eduard Zingerman <eddyz87@gmail.com>
> > Cc: Song Liu <song@kernel.org>
> > Cc: Yonghong Song <yonghong.song@linux.dev>
> > Cc: John Fastabend <john.fastabend@gmail.com>
> > Cc: KP Singh <kpsingh@kernel.org>
> > Cc: Stanislav Fomichev <sdf@fomichev.me>
> > Cc: Hao Luo <haoluo@google.com>
> > Cc: Jiri Olsa <jolsa@kernel.org>
> > ---
> > kernel/bpf/arraymap.c | 8 ++++----
> > kernel/bpf/hashtab.c | 8 ++++----
> > kernel/bpf/helpers.c | 4 ++--
> > kernel/bpf/memalloc.c | 12 ++++++------
> > 4 files changed, 16 insertions(+), 16 deletions(-)
> >
> > diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
> > index 188e3c2effb2..544ca433275e 100644
> > --- a/kernel/bpf/arraymap.c
> > +++ b/kernel/bpf/arraymap.c
> > @@ -600,7 +600,7 @@ static void *bpf_array_map_seq_start(struct seq_file *seq, loff_t *pos)
> > array = container_of(map, struct bpf_array, map);
> > index = info->index & array->index_mask;
> > if (info->percpu_value_buf)
> > - return array->pptrs[index];
> > + return array->ptrs[index];
>
> I disagree with this change.
> One might say that indeed the address space is cast away here,
> however, value returned by this function is only used in functions
> bpf_array_map_seq_{next,show,stop}(), where it is guarded by the same
> 'if (info->percpu_value_buf)' condition to identify if per_cpu_ptr()
> is necessary.
If this is the case, you have to inform the compiler that address
space is cast away with explicit (void *)(uintptr_t) cast, placed
before return. But looking at the union with ptrs and pptrs members,
it looked to me that it is just the case of wrong union member
accessed.
> > return array_map_elem_ptr(array, index);
> > }
> >
> > @@ -619,7 +619,7 @@ static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
> > array = container_of(map, struct bpf_array, map);
> > index = info->index & array->index_mask;
> > if (info->percpu_value_buf)
> > - return array->pptrs[index];
> > + return array->ptrs[index];
>
> Same as above.
>
> > return array_map_elem_ptr(array, index);
> > }
> >
> > @@ -632,7 +632,7 @@ static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
> > struct bpf_iter_meta meta;
> > struct bpf_prog *prog;
> > int off = 0, cpu = 0;
> > - void __percpu **pptr;
> > + void * __percpu *pptr;
>
> Should this be 'void __percpu *pptr;?
> The value comes from array->pptrs[*] field,
> which has the above type for elements.
I didn't want to introduce semantic changes, so I have just changed
the base type fo __percpu one, due to:
per_cpu_ptr(pptr, cpu));
later in the code.
>
> > u32 size;
> >
> > meta.seq = seq;
> > @@ -648,7 +648,7 @@ static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
> > if (!info->percpu_value_buf) {
> > ctx.value = v;
> > } else {
> > - pptr = v;
> > + pptr = (void __percpu *)(uintptr_t)v;
> > size = array->elem_size;
> > for_each_possible_cpu(cpu) {
> > copy_map_value_long(map, info->percpu_value_buf + off,
> > diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
> > index be1f64c20125..a49212bbda09 100644
> > --- a/kernel/bpf/hashtab.c
> > +++ b/kernel/bpf/hashtab.c
> > @@ -1049,14 +1049,14 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
> > pptr = htab_elem_get_ptr(l_new, key_size);
> > } else {
> > /* alloc_percpu zero-fills */
> > - pptr = bpf_mem_cache_alloc(&htab->pcpu_ma);
> > - if (!pptr) {
> > + void *ptr = bpf_mem_cache_alloc(&htab->pcpu_ma);
> > + if (!ptr) {
>
> Why adding an intermediate variable here?
Mainly to avoid several inter-as casts, because l_new->ptr_to_pptr
also expects assignment from generic address space.
> Is casting bpf_mem_cache_alloc() result to percpu not sufficient?
> It looks like bpf_mem_cache_alloc() returns a percpu pointer,
> should it be declared as such?
This function is also used a couple of lines above changed hunk:
l_new = bpf_mem_cache_alloc(&htab->ma);
where l_new is declared in generic address space.
>
> > bpf_mem_cache_free(&htab->ma, l_new);
> > l_new = ERR_PTR(-ENOMEM);
> > goto dec_count;
> > }
> > - l_new->ptr_to_pptr = pptr;
> > - pptr = *(void **)pptr;
> > + l_new->ptr_to_pptr = ptr;
> > + pptr = *(void __percpu **)ptr;
> > }
> >
> > pcpu_init_value(htab, pptr, value, onallcpus);
>
> [...]
>
> > diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
> > index dec892ded031..b3858a76e0b3 100644
> > --- a/kernel/bpf/memalloc.c
> > +++ b/kernel/bpf/memalloc.c
> > @@ -138,8 +138,8 @@ static struct llist_node notrace *__llist_del_first(struct llist_head *head)
> > static void *__alloc(struct bpf_mem_cache *c, int node, gfp_t flags)
> > {
> > if (c->percpu_size) {
> > - void **obj = kmalloc_node(c->percpu_size, flags, node);
> > - void *pptr = __alloc_percpu_gfp(c->unit_size, 8, flags);
> > + void __percpu **obj = kmalloc_node(c->percpu_size, flags, node);
>
> Why __percpu is needed for obj?
The new declaration declares "void pointer to percpu pointer", it is
needed because some lines below we have:
obj[1] = pptr;
and pptr needs to be declared in __percpu named address space due to:
free_percpu(pptr);
> kmalloc_node is defined as 'alloc_hooks(kmalloc_node_noprof(__VA_ARGS__))',
> alloc_hooks(X) is a macro and it produces result of type typeof(X),
> kmalloc_node_noprof() returns void*, not __percpu void*.
> Do I miss something?
Yes, as explained above, the declaration:
void __percpu **obj
somehow unintuitively declares void pointer to percpu pointer, so the
types match (otherwise, the compiler would complain ;) ).
Thanks,
Uros.
[-- Attachment #2: named-as.c --]
[-- Type: text/x-c-code, Size: 1086 bytes --]
#define NULL 0
#define __verify_pcpu_ptr(ptr) \
do { \
const void __seg_gs *__vpp_verify = (typeof((ptr) + 0))NULL; \
(void)__vpp_verify; \
} while (0)
void __seg_gs **__pptr; // void pointer na void __seg_gs pointer
void *foo1 (void *v)
{
void __seg_gs **pptr = v;
__verify_pcpu_ptr (*pptr);
return pptr;
}
void __seg_gs *foo2 (void *v)
{
void __seg_gs **pptr = v;
__verify_pcpu_ptr (*pptr);
return *pptr;
}
void * __seg_gs *__ptr; // __seg_gs void pointer to void pointer
void __seg_gs *bar1 (void __seg_gs *v)
{
void * __seg_gs *ptr = v;
__verify_pcpu_ptr (ptr);
return ptr;
}
void *bar2 (void __seg_gs *v)
{
void * __seg_gs *ptr = v;
__verify_pcpu_ptr (ptr);
return *ptr;
}
void __seg_gs *qux (void *ptr)
{
return *(void __seg_gs **)ptr;
}
void __seg_gs *quux (void *ptr)
{
return ((void __seg_gs **)ptr)[1];
}
void __seg_gs *quuux (void __seg_gs **ptr)
{
return *ptr;
}
void __seg_gs *test (void *v)
{
void __seg_gs **pptr = v;
return *pptr;
}
void *test_ (void *v)
{
void __seg_gs **pptr = v;
return pptr;
}
^ permalink raw reply [flat|nested] 5+ messages in thread* Re: [PATCH] bpf: Fix percpu address space issues
2024-08-09 10:15 ` Uros Bizjak
@ 2024-08-09 20:29 ` Eduard Zingerman
2024-08-10 8:35 ` Uros Bizjak
0 siblings, 1 reply; 5+ messages in thread
From: Eduard Zingerman @ 2024-08-09 20:29 UTC (permalink / raw)
To: Uros Bizjak; +Cc: bpf, Alexei Starovoitov
On Fri, 2024-08-09 at 12:15 +0200, Uros Bizjak wrote:
> On Fri, Aug 9, 2024 at 10:28 AM Eduard Zingerman <eddyz87@gmail.com> wrote:
> >
> > On Sun, 2024-08-04 at 20:55 +0200, Uros Bizjak wrote:
> >
> > [...]
> >
> > > Found by GCC's named address space checks.
> >
> > Please provide some additional details.
> > I assume that the definition of __percpu was changed from
> > __attribute__((btf_type_tag(percpu))) to
> > __attribute__((address_space(??)), is that correct?
>
> This is correct. The fixes in the patch are based on the patch series
> [1] that enable strict percpu check via GCC's x86 named address space
> qualifiers, and in its RFC state hacks __seg_gs into the __percpu
> qualifier (as can be seen in the 3/3 patch). The compiler will detect
> pointer address space mismatches for e.g.:
>
> --cut here--
> int __seg_gs m;
>
> int *foo (void) { return &m; }
> --cut here--
>
> v.c: In function ‘foo’:
> v.c:5:26: error: return from pointer to non-enclosed address space
> 5 | int *foo (void) { return &m; }
> | ^~
> v.c:5:26: note: expected ‘int *’ but pointer is of type ‘__seg_gs int *’
>
> and expects explicit casts via uintptr_t when these casts are really
> intended ([2], please also see [3] for similar sparse requirement):
>
> int *foo (void) { return (int *)(uintptr_t)&m; }
>
> [1] https://lore.kernel.org/lkml/20240805184012.358023-1-ubizjak@gmail.com/
> [2] https://gcc.gnu.org/onlinedocs/gcc/Named-Address-Spaces.html#x86-Named-Address-Spaces
> [3] https://sparse.docs.kernel.org/en/latest/annotations.html#address-space-name
Understood, thank you for the details.
Interestingly, clang does not require (uintptr_t) intermediate cast, e.g.:
$ cat test.c
#define __as(N) __attribute__((address_space(N)))
void *foo(void __as(1)* x) { return x; } // error
void *bar(void __as(1)* x) { return (void *)x; } // fine
$ clang -o /dev/null -c test.c
test.c:3:37: error: returning '__as(1) void *' from a function with result type 'void *' changes address space of pointer
3 | void *foo(void __as(1)* x) { return x; } // error
| ^
1 error generated.
[...]
> > > diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
> > > index 188e3c2effb2..544ca433275e 100644
> > > --- a/kernel/bpf/arraymap.c
> > > +++ b/kernel/bpf/arraymap.c
> > > @@ -600,7 +600,7 @@ static void *bpf_array_map_seq_start(struct seq_file *seq, loff_t *pos)
> > > array = container_of(map, struct bpf_array, map);
> > > index = info->index & array->index_mask;
> > > if (info->percpu_value_buf)
> > > - return array->pptrs[index];
> > > + return array->ptrs[index];
> >
> > I disagree with this change.
> > One might say that indeed the address space is cast away here,
> > however, value returned by this function is only used in functions
> > bpf_array_map_seq_{next,show,stop}(), where it is guarded by the same
> > 'if (info->percpu_value_buf)' condition to identify if per_cpu_ptr()
> > is necessary.
>
> If this is the case, you have to inform the compiler that address
> space is cast away with explicit (void *)(uintptr_t) cast, placed
> before return. But looking at the union with ptrs and pptrs members,
> it looked to me that it is just the case of wrong union member
> accessed.
I'd say it's better to use pptr and add a cast in this case.
[...]
> > > @@ -632,7 +632,7 @@ static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
> > > struct bpf_iter_meta meta;
> > > struct bpf_prog *prog;
> > > int off = 0, cpu = 0;
> > > - void __percpu **pptr;
> > > + void * __percpu *pptr;
> >
> > Should this be 'void __percpu *pptr;?
> > The value comes from array->pptrs[*] field,
> > which has the above type for elements.
>
> I didn't want to introduce semantic changes, so I have just changed
> the base type fo __percpu one, due to:
>
> per_cpu_ptr(pptr, cpu));
>
> later in the code.
There would be no semantic changes if type of pptr is changed to 'void __percpu *'.
[...]
> > > diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
> > > index be1f64c20125..a49212bbda09 100644
> > > --- a/kernel/bpf/hashtab.c
> > > +++ b/kernel/bpf/hashtab.c
> > > @@ -1049,14 +1049,14 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
> > > pptr = htab_elem_get_ptr(l_new, key_size);
> > > } else {
> > > /* alloc_percpu zero-fills */
> > > - pptr = bpf_mem_cache_alloc(&htab->pcpu_ma);
> > > - if (!pptr) {
> > > + void *ptr = bpf_mem_cache_alloc(&htab->pcpu_ma);
> > > + if (!ptr) {
> >
> > Why adding an intermediate variable here?
>
> Mainly to avoid several inter-as casts, because l_new->ptr_to_pptr
> also expects assignment from generic address space.
Ok, makes sense.
[...]
> > > diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
> > > index dec892ded031..b3858a76e0b3 100644
> > > --- a/kernel/bpf/memalloc.c
> > > +++ b/kernel/bpf/memalloc.c
> > > @@ -138,8 +138,8 @@ static struct llist_node notrace *__llist_del_first(struct llist_head *head)
> > > static void *__alloc(struct bpf_mem_cache *c, int node, gfp_t flags)
> > > {
> > > if (c->percpu_size) {
> > > - void **obj = kmalloc_node(c->percpu_size, flags, node);
> > > - void *pptr = __alloc_percpu_gfp(c->unit_size, 8, flags);
> > > + void __percpu **obj = kmalloc_node(c->percpu_size, flags, node);
> >
> > Why __percpu is needed for obj?
>
> The new declaration declares "void pointer to percpu pointer", it is
> needed because some lines below we have:
>
> obj[1] = pptr;
Oh, right.
[...]
^ permalink raw reply [flat|nested] 5+ messages in thread* Re: [PATCH] bpf: Fix percpu address space issues
2024-08-09 20:29 ` Eduard Zingerman
@ 2024-08-10 8:35 ` Uros Bizjak
0 siblings, 0 replies; 5+ messages in thread
From: Uros Bizjak @ 2024-08-10 8:35 UTC (permalink / raw)
To: Eduard Zingerman; +Cc: bpf, Alexei Starovoitov
On Fri, Aug 9, 2024 at 10:29 PM Eduard Zingerman <eddyz87@gmail.com> wrote:
>
> On Fri, 2024-08-09 at 12:15 +0200, Uros Bizjak wrote:
> > On Fri, Aug 9, 2024 at 10:28 AM Eduard Zingerman <eddyz87@gmail.com> wrote:
> > >
> > > On Sun, 2024-08-04 at 20:55 +0200, Uros Bizjak wrote:
> > >
> > > [...]
> > >
> > > > Found by GCC's named address space checks.
> > >
> > > Please provide some additional details.
> > > I assume that the definition of __percpu was changed from
> > > __attribute__((btf_type_tag(percpu))) to
> > > __attribute__((address_space(??)), is that correct?
> >
> > This is correct. The fixes in the patch are based on the patch series
> > [1] that enable strict percpu check via GCC's x86 named address space
> > qualifiers, and in its RFC state hacks __seg_gs into the __percpu
> > qualifier (as can be seen in the 3/3 patch). The compiler will detect
> > pointer address space mismatches for e.g.:
> >
> > --cut here--
> > int __seg_gs m;
> >
> > int *foo (void) { return &m; }
> > --cut here--
> >
> > v.c: In function ‘foo’:
> > v.c:5:26: error: return from pointer to non-enclosed address space
> > 5 | int *foo (void) { return &m; }
> > | ^~
> > v.c:5:26: note: expected ‘int *’ but pointer is of type ‘__seg_gs int *’
> >
> > and expects explicit casts via uintptr_t when these casts are really
> > intended ([2], please also see [3] for similar sparse requirement):
> >
> > int *foo (void) { return (int *)(uintptr_t)&m; }
> >
> > [1] https://lore.kernel.org/lkml/20240805184012.358023-1-ubizjak@gmail.com/
> > [2] https://gcc.gnu.org/onlinedocs/gcc/Named-Address-Spaces.html#x86-Named-Address-Spaces
> > [3] https://sparse.docs.kernel.org/en/latest/annotations.html#address-space-name
>
> Understood, thank you for the details.
> Interestingly, clang does not require (uintptr_t) intermediate cast, e.g.:
>
> $ cat test.c
> #define __as(N) __attribute__((address_space(N)))
>
> void *foo(void __as(1)* x) { return x; } // error
> void *bar(void __as(1)* x) { return (void *)x; } // fine
>
> $ clang -o /dev/null -c test.c
> test.c:3:37: error: returning '__as(1) void *' from a function with result type 'void *' changes address space of pointer
> 3 | void *foo(void __as(1)* x) { return x; } // error
> | ^
> 1 error generated.
This is probably a deficiency in clang, sparse nicely explains the
reason for intermediate cast:
-q-
Sparse treats pointers with different address spaces as distinct types
and will warn on casts (implicit or explicit) mixing the address
spaces. An exception to this is when the destination type is uintptr_t
or unsigned long since the resulting integer value is independent of
the address space and can’t be dereferenced without first casting it
back to a pointer type.
-/q-
>
>
> [...]
>
> > > > diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
> > > > index 188e3c2effb2..544ca433275e 100644
> > > > --- a/kernel/bpf/arraymap.c
> > > > +++ b/kernel/bpf/arraymap.c
> > > > @@ -600,7 +600,7 @@ static void *bpf_array_map_seq_start(struct seq_file *seq, loff_t *pos)
> > > > array = container_of(map, struct bpf_array, map);
> > > > index = info->index & array->index_mask;
> > > > if (info->percpu_value_buf)
> > > > - return array->pptrs[index];
> > > > + return array->ptrs[index];
> > >
> > > I disagree with this change.
> > > One might say that indeed the address space is cast away here,
> > > however, value returned by this function is only used in functions
> > > bpf_array_map_seq_{next,show,stop}(), where it is guarded by the same
> > > 'if (info->percpu_value_buf)' condition to identify if per_cpu_ptr()
> > > is necessary.
> >
> > If this is the case, you have to inform the compiler that address
> > space is cast away with explicit (void *)(uintptr_t) cast, placed
> > before return. But looking at the union with ptrs and pptrs members,
> > it looked to me that it is just the case of wrong union member
> > accessed.
>
> I'd say it's better to use pptr and add a cast in this case.
OK, will do in a v2 patch, together with your other suggestion.
Thanks,
Uros.
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2024-08-10 8:36 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-08-04 18:55 [PATCH] bpf: Fix percpu address space issues Uros Bizjak
2024-08-09 8:28 ` Eduard Zingerman
2024-08-09 10:15 ` Uros Bizjak
2024-08-09 20:29 ` Eduard Zingerman
2024-08-10 8:35 ` Uros Bizjak
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox