* [PATCH bpf-next v3 0/2] Add kernel symbol for struct_ops trampoline
@ 2024-11-11 12:16 Xu Kuohai
2024-11-11 12:16 ` [PATCH bpf-next v3 1/2] bpf: Use function pointers count as struct_ops links count Xu Kuohai
` (2 more replies)
0 siblings, 3 replies; 6+ messages in thread
From: Xu Kuohai @ 2024-11-11 12:16 UTC (permalink / raw)
To: bpf, netdev
Cc: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
Martin KaFai Lau, Eduard Zingerman, Yonghong Song, Kui-Feng Lee
From: Xu Kuohai <xukuohai@huawei.com>
Add kernel symbol for struct_ops trampoline.
Without kernel symbol for struct_ops trampoline, the unwinder may
produce unexpected stacktraces. For example, the x86 ORC and FP
unwinder stops stacktrace on a struct_ops trampoline address since
there is no kernel symbol for the address.
v3:
- Add a separate cleanup patch to replace links_cnt with funcs_cnt
- Allocate ksyms on-demand in update_elem() to stay with the links
allocation way
- Set ksym name to prog__<struct_ops_name>_<member_name>
v2: https://lore.kernel.org/bpf/20241101111948.1570547-1-xukuohai@huaweicloud.com/
- Refine the commit message for clarity and fix a test bot warning
v1: https://lore.kernel.org/bpf/20241030111533.907289-1-xukuohai@huaweicloud.com/
Xu Kuohai (2):
bpf: Use function pointers count as struct_ops links count
bpf: Add kernel symbol for struct_ops trampoline
include/linux/bpf.h | 3 +-
kernel/bpf/bpf_struct_ops.c | 114 ++++++++++++++++++++++++++++++++----
kernel/bpf/dispatcher.c | 3 +-
kernel/bpf/trampoline.c | 9 ++-
4 files changed, 114 insertions(+), 15 deletions(-)
--
2.39.5
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH bpf-next v3 1/2] bpf: Use function pointers count as struct_ops links count
2024-11-11 12:16 [PATCH bpf-next v3 0/2] Add kernel symbol for struct_ops trampoline Xu Kuohai
@ 2024-11-11 12:16 ` Xu Kuohai
2024-11-11 12:16 ` [PATCH bpf-next v3 2/2] bpf: Add kernel symbol for struct_ops trampoline Xu Kuohai
2024-11-11 12:54 ` [PATCH bpf-next v3 0/2] " Xu Kuohai
2 siblings, 0 replies; 6+ messages in thread
From: Xu Kuohai @ 2024-11-11 12:16 UTC (permalink / raw)
To: bpf, netdev
Cc: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
Martin KaFai Lau, Eduard Zingerman, Yonghong Song, Kui-Feng Lee
From: Xu Kuohai <xukuohai@huawei.com>
Only function pointers in a struct_ops structure can be linked to bpf
progs, so set the links count to the function pointers count, instead
of the total members count in the structure.
Suggested-by: Martin KaFai Lau <martin.lau@linux.dev>
Signed-off-by: Xu Kuohai <xukuohai@huawei.com>
---
kernel/bpf/bpf_struct_ops.c | 33 ++++++++++++++++++++++++---------
1 file changed, 24 insertions(+), 9 deletions(-)
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index fda3dd2ee984..e99fce81e916 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -32,7 +32,7 @@ struct bpf_struct_ops_map {
* (in kvalue.data).
*/
struct bpf_link **links;
- u32 links_cnt;
+ u32 funcs_cnt;
u32 image_pages_cnt;
/* image_pages is an array of pages that has all the trampolines
* that stores the func args before calling the bpf_prog.
@@ -481,11 +481,11 @@ static void bpf_struct_ops_map_put_progs(struct bpf_struct_ops_map *st_map)
{
u32 i;
- for (i = 0; i < st_map->links_cnt; i++) {
- if (st_map->links[i]) {
- bpf_link_put(st_map->links[i]);
- st_map->links[i] = NULL;
- }
+ for (i = 0; i < st_map->funcs_cnt; i++) {
+ if (!st_map->links[i])
+ break;
+ bpf_link_put(st_map->links[i]);
+ st_map->links[i] = NULL;
}
}
@@ -601,6 +601,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
int prog_fd, err;
u32 i, trampoline_start, image_off = 0;
void *cur_image = NULL, *image = NULL;
+ struct bpf_link **plink;
if (flags)
return -EINVAL;
@@ -639,6 +640,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
udata = &uvalue->data;
kdata = &kvalue->data;
+ plink = st_map->links;
module_type = btf_type_by_id(btf_vmlinux, st_ops_ids[IDX_MODULE_ID]);
for_each_member(i, t, member) {
const struct btf_type *mtype, *ptype;
@@ -714,7 +716,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
}
bpf_link_init(&link->link, BPF_LINK_TYPE_STRUCT_OPS,
&bpf_struct_ops_link_lops, prog);
- st_map->links[i] = &link->link;
+ *plink++ = &link->link;
trampoline_start = image_off;
err = bpf_struct_ops_prepare_trampoline(tlinks, link,
@@ -895,6 +897,19 @@ static int bpf_struct_ops_map_alloc_check(union bpf_attr *attr)
return 0;
}
+static u32 count_func_ptrs(const struct btf *btf, const struct btf_type *t)
+{
+ int i;
+ u32 count;
+ const struct btf_member *member;
+
+ count = 0;
+ for_each_member(i, t, member)
+ if (btf_type_resolve_func_ptr(btf, member->type, NULL))
+ count++;
+ return count;
+}
+
static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
{
const struct bpf_struct_ops_desc *st_ops_desc;
@@ -961,9 +976,9 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
map = &st_map->map;
st_map->uvalue = bpf_map_area_alloc(vt->size, NUMA_NO_NODE);
- st_map->links_cnt = btf_type_vlen(t);
+ st_map->funcs_cnt = count_func_ptrs(btf, t);
st_map->links =
- bpf_map_area_alloc(st_map->links_cnt * sizeof(struct bpf_links *),
+ bpf_map_area_alloc(st_map->funcs_cnt * sizeof(struct bpf_links *),
NUMA_NO_NODE);
if (!st_map->uvalue || !st_map->links) {
ret = -ENOMEM;
--
2.39.5
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH bpf-next v3 2/2] bpf: Add kernel symbol for struct_ops trampoline
2024-11-11 12:16 [PATCH bpf-next v3 0/2] Add kernel symbol for struct_ops trampoline Xu Kuohai
2024-11-11 12:16 ` [PATCH bpf-next v3 1/2] bpf: Use function pointers count as struct_ops links count Xu Kuohai
@ 2024-11-11 12:16 ` Xu Kuohai
2024-11-11 23:04 ` Martin KaFai Lau
2024-11-11 12:54 ` [PATCH bpf-next v3 0/2] " Xu Kuohai
2 siblings, 1 reply; 6+ messages in thread
From: Xu Kuohai @ 2024-11-11 12:16 UTC (permalink / raw)
To: bpf, netdev
Cc: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
Martin KaFai Lau, Eduard Zingerman, Yonghong Song, Kui-Feng Lee
From: Xu Kuohai <xukuohai@huawei.com>
Without kernel symbols for struct_ops trampoline, the unwinder may
produce unexpected stacktraces.
For example, the x86 ORC and FP unwinders check if an IP is in kernel
text by verifying the presence of the IP's kernel symbol. When a
struct_ops trampoline address is encountered, the unwinder stops due
to the absence of symbol, resulting in an incomplete stacktrace that
consists only of direct and indirect child functions called from the
trampoline.
The arm64 unwinder is another example. While the arm64 unwinder can
proceed across a struct_ops trampoline address, the corresponding
symbol name is displayed as "unknown", which is confusing.
Thus, add kernel symbol for struct_ops trampoline. The name is
bpf__<struct_ops_name>_<member_name>, where <struct_ops_name> is the
type name of the struct_ops, and <member_name> is the name of
the member that the trampoline is linked to.
Below is a comparison of stacktraces captured on x86 by perf record,
before and after this patch.
Before:
ffffffff8116545d __lock_acquire+0xad ([kernel.kallsyms])
ffffffff81167fcc lock_acquire+0xcc ([kernel.kallsyms])
ffffffff813088f4 __bpf_prog_enter+0x34 ([kernel.kallsyms])
After:
ffffffff811656bd __lock_acquire+0x30d ([kernel.kallsyms])
ffffffff81167fcc lock_acquire+0xcc ([kernel.kallsyms])
ffffffff81309024 __bpf_prog_enter+0x34 ([kernel.kallsyms])
ffffffffc000d7e9 bpf__tcp_congestion_ops_cong_avoid+0x3e ([kernel.kallsyms])
ffffffff81f250a5 tcp_ack+0x10d5 ([kernel.kallsyms])
ffffffff81f27c66 tcp_rcv_established+0x3b6 ([kernel.kallsyms])
ffffffff81f3ad03 tcp_v4_do_rcv+0x193 ([kernel.kallsyms])
ffffffff81d65a18 __release_sock+0xd8 ([kernel.kallsyms])
ffffffff81d65af4 release_sock+0x34 ([kernel.kallsyms])
ffffffff81f15c4b tcp_sendmsg+0x3b ([kernel.kallsyms])
ffffffff81f663d7 inet_sendmsg+0x47 ([kernel.kallsyms])
ffffffff81d5ab40 sock_write_iter+0x160 ([kernel.kallsyms])
ffffffff8149c67b vfs_write+0x3fb ([kernel.kallsyms])
ffffffff8149caf6 ksys_write+0xc6 ([kernel.kallsyms])
ffffffff8149cb5d __x64_sys_write+0x1d ([kernel.kallsyms])
ffffffff81009200 x64_sys_call+0x1d30 ([kernel.kallsyms])
ffffffff82232d28 do_syscall_64+0x68 ([kernel.kallsyms])
ffffffff8240012f entry_SYSCALL_64_after_hwframe+0x76 ([kernel.kallsyms])
Note that while adding new member ksyms to struct bpf_struct_ops_map,
this patch also removes an unused member rcu from the structure.
Fixes: 85d33df357b6 ("bpf: Introduce BPF_MAP_TYPE_STRUCT_OPS")
Signed-off-by: Xu Kuohai <xukuohai@huawei.com>
Acked-by: Yonghong Song <yonghong.song@linux.dev>
---
include/linux/bpf.h | 3 +-
kernel/bpf/bpf_struct_ops.c | 81 ++++++++++++++++++++++++++++++++++++-
kernel/bpf/dispatcher.c | 3 +-
kernel/bpf/trampoline.c | 9 ++++-
4 files changed, 90 insertions(+), 6 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 1b84613b10ac..6fc6398d86c6 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1402,7 +1402,8 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func
void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
struct bpf_prog *to);
/* Called only from JIT-enabled code, so there's no need for stubs. */
-void bpf_image_ksym_add(void *data, unsigned int size, struct bpf_ksym *ksym);
+void bpf_image_ksym_init(void *data, unsigned int size, struct bpf_ksym *ksym);
+void bpf_image_ksym_add(struct bpf_ksym *ksym);
void bpf_image_ksym_del(struct bpf_ksym *ksym);
void bpf_ksym_add(struct bpf_ksym *ksym);
void bpf_ksym_del(struct bpf_ksym *ksym);
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index e99fce81e916..d6dd56fc80d8 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -23,7 +23,6 @@ struct bpf_struct_ops_value {
struct bpf_struct_ops_map {
struct bpf_map map;
- struct rcu_head rcu;
const struct bpf_struct_ops_desc *st_ops_desc;
/* protect map_update */
struct mutex lock;
@@ -32,6 +31,8 @@ struct bpf_struct_ops_map {
* (in kvalue.data).
*/
struct bpf_link **links;
+ /* ksyms for bpf trampolines */
+ struct bpf_ksym **ksyms;
u32 funcs_cnt;
u32 image_pages_cnt;
/* image_pages is an array of pages that has all the trampolines
@@ -586,6 +587,49 @@ int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
return 0;
}
+static void bpf_struct_ops_ksym_init(const char *tname, const char *mname,
+ void *image, unsigned int size,
+ struct bpf_ksym *ksym)
+{
+ snprintf(ksym->name, KSYM_NAME_LEN, "bpf__%s_%s", tname, mname);
+ INIT_LIST_HEAD_RCU(&ksym->lnode);
+ bpf_image_ksym_init(image, size, ksym);
+}
+
+static void bpf_struct_ops_map_ksyms_add(struct bpf_struct_ops_map *st_map)
+{
+ u32 i;
+
+ for (i = 0; i < st_map->funcs_cnt; i++) {
+ if (!st_map->ksyms[i])
+ break;
+ bpf_image_ksym_add(st_map->ksyms[i]);
+ }
+}
+
+static void bpf_struct_ops_map_del_ksyms(struct bpf_struct_ops_map *st_map)
+{
+ u32 i;
+
+ for (i = 0; i < st_map->funcs_cnt; i++) {
+ if (!st_map->ksyms[i])
+ break;
+ bpf_image_ksym_del(st_map->ksyms[i]);
+ }
+}
+
+static void bpf_struct_ops_map_free_ksyms(struct bpf_struct_ops_map *st_map)
+{
+ u32 i;
+
+ for (i = 0; i < st_map->funcs_cnt; i++) {
+ if (!st_map->ksyms[i])
+ break;
+ kfree(st_map->ksyms[i]);
+ st_map->links[i] = NULL;
+ }
+}
+
static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
void *value, u64 flags)
{
@@ -602,6 +646,8 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
u32 i, trampoline_start, image_off = 0;
void *cur_image = NULL, *image = NULL;
struct bpf_link **plink;
+ struct bpf_ksym **pksym;
+ const char *tname, *mname;
if (flags)
return -EINVAL;
@@ -641,14 +687,18 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
kdata = &kvalue->data;
plink = st_map->links;
+ pksym = st_map->ksyms;
+ tname = btf_name_by_offset(st_map->btf, t->name_off);
module_type = btf_type_by_id(btf_vmlinux, st_ops_ids[IDX_MODULE_ID]);
for_each_member(i, t, member) {
const struct btf_type *mtype, *ptype;
struct bpf_prog *prog;
struct bpf_tramp_link *link;
+ struct bpf_ksym *ksym;
u32 moff;
moff = __btf_member_bit_offset(t, member) / 8;
+ mname = btf_name_by_offset(st_map->btf, member->name_off);
ptype = btf_type_resolve_ptr(st_map->btf, member->type, NULL);
if (ptype == module_type) {
if (*(void **)(udata + moff))
@@ -718,6 +768,14 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
&bpf_struct_ops_link_lops, prog);
*plink++ = &link->link;
+ ksym = kzalloc(sizeof(*ksym), GFP_USER);
+ if (!ksym) {
+ bpf_prog_put(prog);
+ err = -ENOMEM;
+ goto reset_unlock;
+ }
+ *pksym = ksym;
+
trampoline_start = image_off;
err = bpf_struct_ops_prepare_trampoline(tlinks, link,
&st_ops->func_models[i],
@@ -737,6 +795,12 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
/* put prog_id to udata */
*(unsigned long *)(udata + moff) = prog->aux->id;
+
+ /* init ksym for this trampoline */
+ bpf_struct_ops_ksym_init(tname, mname,
+ image + trampoline_start,
+ image_off - trampoline_start,
+ *pksym++);
}
if (st_ops->validate) {
@@ -785,6 +849,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
*/
reset_unlock:
+ bpf_struct_ops_map_free_ksyms(st_map);
bpf_struct_ops_map_free_image(st_map);
bpf_struct_ops_map_put_progs(st_map);
memset(uvalue, 0, map->value_size);
@@ -792,6 +857,8 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
unlock:
kfree(tlinks);
mutex_unlock(&st_map->lock);
+ if (!err)
+ bpf_struct_ops_map_ksyms_add(st_map);
return err;
}
@@ -851,7 +918,10 @@ static void __bpf_struct_ops_map_free(struct bpf_map *map)
if (st_map->links)
bpf_struct_ops_map_put_progs(st_map);
+ if (st_map->ksyms)
+ bpf_struct_ops_map_free_ksyms(st_map);
bpf_map_area_free(st_map->links);
+ bpf_map_area_free(st_map->ksyms);
bpf_struct_ops_map_free_image(st_map);
bpf_map_area_free(st_map->uvalue);
bpf_map_area_free(st_map);
@@ -868,6 +938,9 @@ static void bpf_struct_ops_map_free(struct bpf_map *map)
if (btf_is_module(st_map->btf))
module_put(st_map->st_ops_desc->st_ops->owner);
+ if (st_map->ksyms)
+ bpf_struct_ops_map_del_ksyms(st_map);
+
/* The struct_ops's function may switch to another struct_ops.
*
* For example, bpf_tcp_cc_x->init() may switch to
@@ -980,7 +1053,11 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
st_map->links =
bpf_map_area_alloc(st_map->funcs_cnt * sizeof(struct bpf_links *),
NUMA_NO_NODE);
- if (!st_map->uvalue || !st_map->links) {
+
+ st_map->ksyms =
+ bpf_map_area_alloc(st_map->funcs_cnt * sizeof(struct bpf_ksyms *),
+ NUMA_NO_NODE);
+ if (!st_map->uvalue || !st_map->links || !st_map->ksyms) {
ret = -ENOMEM;
goto errout_free;
}
diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c
index 70fb82bf1637..aad8a11cc7e5 100644
--- a/kernel/bpf/dispatcher.c
+++ b/kernel/bpf/dispatcher.c
@@ -154,7 +154,8 @@ void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
d->image = NULL;
goto out;
}
- bpf_image_ksym_add(d->image, PAGE_SIZE, &d->ksym);
+ bpf_image_ksym_init(d->image, PAGE_SIZE, &d->ksym);
+ bpf_image_ksym_add(d->image);
}
prev_num_progs = d->num_progs;
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 9f36c049f4c2..c3efca44c8f7 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -115,10 +115,14 @@ bool bpf_prog_has_trampoline(const struct bpf_prog *prog)
(ptype == BPF_PROG_TYPE_LSM && eatype == BPF_LSM_MAC);
}
-void bpf_image_ksym_add(void *data, unsigned int size, struct bpf_ksym *ksym)
+void bpf_image_ksym_init(void *data, unsigned int size, struct bpf_ksym *ksym)
{
ksym->start = (unsigned long) data;
ksym->end = ksym->start + size;
+}
+
+void bpf_image_ksym_add(struct bpf_ksym *ksym)
+{
bpf_ksym_add(ksym);
perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start,
PAGE_SIZE, false, ksym->name);
@@ -377,7 +381,8 @@ static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, int size)
ksym = &im->ksym;
INIT_LIST_HEAD_RCU(&ksym->lnode);
snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu", key);
- bpf_image_ksym_add(image, size, ksym);
+ bpf_image_ksym_init(image, size, ksym);
+ bpf_image_ksym_add(image);
return im;
out_free_image:
--
2.39.5
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH bpf-next v3 0/2] Add kernel symbol for struct_ops trampoline
2024-11-11 12:16 [PATCH bpf-next v3 0/2] Add kernel symbol for struct_ops trampoline Xu Kuohai
2024-11-11 12:16 ` [PATCH bpf-next v3 1/2] bpf: Use function pointers count as struct_ops links count Xu Kuohai
2024-11-11 12:16 ` [PATCH bpf-next v3 2/2] bpf: Add kernel symbol for struct_ops trampoline Xu Kuohai
@ 2024-11-11 12:54 ` Xu Kuohai
2 siblings, 0 replies; 6+ messages in thread
From: Xu Kuohai @ 2024-11-11 12:54 UTC (permalink / raw)
To: bpf, netdev
Cc: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
Martin KaFai Lau, Eduard Zingerman, Yonghong Song, Kui-Feng Lee
On 11/11/2024 8:16 PM, Xu Kuohai wrote:
> From: Xu Kuohai <xukuohai@huawei.com>
>
> Add kernel symbol for struct_ops trampoline.
>
> Without kernel symbol for struct_ops trampoline, the unwinder may
> produce unexpected stacktraces. For example, the x86 ORC and FP
> unwinder stops stacktrace on a struct_ops trampoline address since
> there is no kernel symbol for the address.
>
> v3:
> - Add a separate cleanup patch to replace links_cnt with funcs_cnt
> - Allocate ksyms on-demand in update_elem() to stay with the links
> allocation way
> - Set ksym name to prog__<struct_ops_name>_<member_name>
>
> v2: https://lore.kernel.org/bpf/20241101111948.1570547-1-xukuohai@huaweicloud.com/
> - Refine the commit message for clarity and fix a test bot warning
>
> v1: https://lore.kernel.org/bpf/20241030111533.907289-1-xukuohai@huaweicloud.com/
>
> Xu Kuohai (2):
> bpf: Use function pointers count as struct_ops links count
> bpf: Add kernel symbol for struct_ops trampoline
>
> include/linux/bpf.h | 3 +-
> kernel/bpf/bpf_struct_ops.c | 114 ++++++++++++++++++++++++++++++++----
> kernel/bpf/dispatcher.c | 3 +-
> kernel/bpf/trampoline.c | 9 ++-
> 4 files changed, 114 insertions(+), 15 deletions(-)
>
Oops, I messed up the code in v2, the argument for
bpf_image_ksym_add in v3 is not correct.
Sorry for the noise.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH bpf-next v3 2/2] bpf: Add kernel symbol for struct_ops trampoline
2024-11-11 12:16 ` [PATCH bpf-next v3 2/2] bpf: Add kernel symbol for struct_ops trampoline Xu Kuohai
@ 2024-11-11 23:04 ` Martin KaFai Lau
2024-11-12 12:06 ` Xu Kuohai
0 siblings, 1 reply; 6+ messages in thread
From: Martin KaFai Lau @ 2024-11-11 23:04 UTC (permalink / raw)
To: Xu Kuohai
Cc: bpf, netdev, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
Eduard Zingerman, Yonghong Song, Kui-Feng Lee
On 11/11/24 4:16 AM, Xu Kuohai wrote:
> diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
> index e99fce81e916..d6dd56fc80d8 100644
> --- a/kernel/bpf/bpf_struct_ops.c
> +++ b/kernel/bpf/bpf_struct_ops.c
> @@ -23,7 +23,6 @@ struct bpf_struct_ops_value {
>
> struct bpf_struct_ops_map {
> struct bpf_map map;
> - struct rcu_head rcu;
Since it needs a respin (more on it later), it will be useful to separate this
cleanup as a separate patch in the same patch series.
> const struct bpf_struct_ops_desc *st_ops_desc;
> /* protect map_update */
> struct mutex lock;
> @@ -32,6 +31,8 @@ struct bpf_struct_ops_map {
> * (in kvalue.data).
> */
> struct bpf_link **links;
> + /* ksyms for bpf trampolines */
> + struct bpf_ksym **ksyms;
> u32 funcs_cnt;
> u32 image_pages_cnt;
> /* image_pages is an array of pages that has all the trampolines
> @@ -586,6 +587,49 @@ int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
> return 0;
> }
>
> +static void bpf_struct_ops_ksym_init(const char *tname, const char *mname,
> + void *image, unsigned int size,
> + struct bpf_ksym *ksym)
> +{
> + snprintf(ksym->name, KSYM_NAME_LEN, "bpf__%s_%s", tname, mname);
> + INIT_LIST_HEAD_RCU(&ksym->lnode);
> + bpf_image_ksym_init(image, size, ksym);
> +}
> +
> +static void bpf_struct_ops_map_ksyms_add(struct bpf_struct_ops_map *st_map)
> +{
> + u32 i;
> +
> + for (i = 0; i < st_map->funcs_cnt; i++) {
> + if (!st_map->ksyms[i])
> + break;
> + bpf_image_ksym_add(st_map->ksyms[i]);
> + }
> +}
> +
> +static void bpf_struct_ops_map_del_ksyms(struct bpf_struct_ops_map *st_map)
> +{
> + u32 i;
> +
> + for (i = 0; i < st_map->funcs_cnt; i++) {
> + if (!st_map->ksyms[i])
> + break;
> + bpf_image_ksym_del(st_map->ksyms[i]);
> + }
> +}
> +
> +static void bpf_struct_ops_map_free_ksyms(struct bpf_struct_ops_map *st_map)
> +{
> + u32 i;
> +
> + for (i = 0; i < st_map->funcs_cnt; i++) {
> + if (!st_map->ksyms[i])
> + break;
> + kfree(st_map->ksyms[i]);
> + st_map->links[i] = NULL;
s/links/ksyms/
pw-bot: cr
> + }
> +}
> +
> static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
> void *value, u64 flags)
> {
> @@ -602,6 +646,8 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
> u32 i, trampoline_start, image_off = 0;
> void *cur_image = NULL, *image = NULL;
> struct bpf_link **plink;
> + struct bpf_ksym **pksym;
> + const char *tname, *mname;
>
> if (flags)
> return -EINVAL;
> @@ -641,14 +687,18 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
> kdata = &kvalue->data;
>
> plink = st_map->links;
> + pksym = st_map->ksyms;
> + tname = btf_name_by_offset(st_map->btf, t->name_off);
> module_type = btf_type_by_id(btf_vmlinux, st_ops_ids[IDX_MODULE_ID]);
> for_each_member(i, t, member) {
> const struct btf_type *mtype, *ptype;
> struct bpf_prog *prog;
> struct bpf_tramp_link *link;
> + struct bpf_ksym *ksym;
> u32 moff;
>
> moff = __btf_member_bit_offset(t, member) / 8;
> + mname = btf_name_by_offset(st_map->btf, member->name_off);
> ptype = btf_type_resolve_ptr(st_map->btf, member->type, NULL);
> if (ptype == module_type) {
> if (*(void **)(udata + moff))
> @@ -718,6 +768,14 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
> &bpf_struct_ops_link_lops, prog);
> *plink++ = &link->link;
>
> + ksym = kzalloc(sizeof(*ksym), GFP_USER);
link is also using kzalloc but probably both link and ksym allocation should use
bpf_map_kzalloc instead. This switch can be done for both together later as a
follow up patch.
> + if (!ksym) {
> + bpf_prog_put(prog);
afaik, this bpf_prog_put is not needed. The bpf_link_init above took the prog
ownership and the bpf_struct_ops_map_put_progs() at the error path will take
care of it.
> + err = -ENOMEM;
> + goto reset_unlock;
> + }
> + *pksym = ksym;
nit. Follow the *plink++ style above and does the same *pksym++ here.
> +
> trampoline_start = image_off;
> err = bpf_struct_ops_prepare_trampoline(tlinks, link,
> &st_ops->func_models[i],
> @@ -737,6 +795,12 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>
> /* put prog_id to udata */
> *(unsigned long *)(udata + moff) = prog->aux->id;
> +
> + /* init ksym for this trampoline */
> + bpf_struct_ops_ksym_init(tname, mname,
> + image + trampoline_start,
> + image_off - trampoline_start,
> + *pksym++);
then uses "ksym" here.
> }
>
> if (st_ops->validate) {
> @@ -785,6 +849,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
> */
>
> reset_unlock:
> + bpf_struct_ops_map_free_ksyms(st_map);
> bpf_struct_ops_map_free_image(st_map);
> bpf_struct_ops_map_put_progs(st_map);
> memset(uvalue, 0, map->value_size);
> @@ -792,6 +857,8 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
> unlock:
> kfree(tlinks);
> mutex_unlock(&st_map->lock);
> + if (!err)
> + bpf_struct_ops_map_ksyms_add(st_map);
> return err;
> }
>
> @@ -851,7 +918,10 @@ static void __bpf_struct_ops_map_free(struct bpf_map *map)
>
> if (st_map->links)
> bpf_struct_ops_map_put_progs(st_map);
> + if (st_map->ksyms)
> + bpf_struct_ops_map_free_ksyms(st_map);
> bpf_map_area_free(st_map->links);
> + bpf_map_area_free(st_map->ksyms);
> bpf_struct_ops_map_free_image(st_map);
> bpf_map_area_free(st_map->uvalue);
> bpf_map_area_free(st_map);
> @@ -868,6 +938,9 @@ static void bpf_struct_ops_map_free(struct bpf_map *map)
> if (btf_is_module(st_map->btf))
> module_put(st_map->st_ops_desc->st_ops->owner);
>
> + if (st_map->ksyms)
This null test should not be needed.
> + bpf_struct_ops_map_del_ksyms(st_map);
> +
> /* The struct_ops's function may switch to another struct_ops.
> *
> * For example, bpf_tcp_cc_x->init() may switch to
> @@ -980,7 +1053,11 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
> st_map->links =
> bpf_map_area_alloc(st_map->funcs_cnt * sizeof(struct bpf_links *),
> NUMA_NO_NODE);
> - if (!st_map->uvalue || !st_map->links) {
> +
> + st_map->ksyms =
> + bpf_map_area_alloc(st_map->funcs_cnt * sizeof(struct bpf_ksyms *),
> + NUMA_NO_NODE);
.map_mem_usage at least needs to include the st_map->ksyms[] pointer array.
func_cnts should be used instead of btf_type_vlen(vt) for link also in
.map_mem_usage.
> + if (!st_map->uvalue || !st_map->links || !st_map->ksyms) {
> ret = -ENOMEM;
> goto errout_free;
> }
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH bpf-next v3 2/2] bpf: Add kernel symbol for struct_ops trampoline
2024-11-11 23:04 ` Martin KaFai Lau
@ 2024-11-12 12:06 ` Xu Kuohai
0 siblings, 0 replies; 6+ messages in thread
From: Xu Kuohai @ 2024-11-12 12:06 UTC (permalink / raw)
To: Martin KaFai Lau
Cc: bpf, netdev, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
Eduard Zingerman, Yonghong Song, Kui-Feng Lee
On 11/12/2024 7:04 AM, Martin KaFai Lau wrote:
> On 11/11/24 4:16 AM, Xu Kuohai wrote:
>> diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
>> index e99fce81e916..d6dd56fc80d8 100644
>> --- a/kernel/bpf/bpf_struct_ops.c
>> +++ b/kernel/bpf/bpf_struct_ops.c
>> @@ -23,7 +23,6 @@ struct bpf_struct_ops_value {
>> struct bpf_struct_ops_map {
>> struct bpf_map map;
>> - struct rcu_head rcu;
>
> Since it needs a respin (more on it later), it will be useful to separate this cleanup as a separate patch in the same patch series.
>
OK
>> const struct bpf_struct_ops_desc *st_ops_desc;
>> /* protect map_update */
>> struct mutex lock;
>> @@ -32,6 +31,8 @@ struct bpf_struct_ops_map {
>> * (in kvalue.data).
>> */
>> struct bpf_link **links;
>> + /* ksyms for bpf trampolines */
>> + struct bpf_ksym **ksyms;
>> u32 funcs_cnt;
>> u32 image_pages_cnt;
>> /* image_pages is an array of pages that has all the trampolines
>> @@ -586,6 +587,49 @@ int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
>> return 0;
>> }
>> +static void bpf_struct_ops_ksym_init(const char *tname, const char *mname,
>> + void *image, unsigned int size,
>> + struct bpf_ksym *ksym)
>> +{
>> + snprintf(ksym->name, KSYM_NAME_LEN, "bpf__%s_%s", tname, mname);
>> + INIT_LIST_HEAD_RCU(&ksym->lnode);
>> + bpf_image_ksym_init(image, size, ksym);
>> +}
>> +
>> +static void bpf_struct_ops_map_ksyms_add(struct bpf_struct_ops_map *st_map)
>> +{
>> + u32 i;
>> +
>> + for (i = 0; i < st_map->funcs_cnt; i++) {
>> + if (!st_map->ksyms[i])
>> + break;
>> + bpf_image_ksym_add(st_map->ksyms[i]);
>> + }
>> +}
>> +
>> +static void bpf_struct_ops_map_del_ksyms(struct bpf_struct_ops_map *st_map)
>> +{
>> + u32 i;
>> +
>> + for (i = 0; i < st_map->funcs_cnt; i++) {
>> + if (!st_map->ksyms[i])
>> + break;
>> + bpf_image_ksym_del(st_map->ksyms[i]);
>> + }
>> +}
>> +
>> +static void bpf_struct_ops_map_free_ksyms(struct bpf_struct_ops_map *st_map)
>> +{
>> + u32 i;
>> +
>> + for (i = 0; i < st_map->funcs_cnt; i++) {
>> + if (!st_map->ksyms[i])
>> + break;
>> + kfree(st_map->ksyms[i]);
>> + st_map->links[i] = NULL;
>
> s/links/ksyms/
>
> pw-bot: cr
>
Oops, good catch
>> + }
>> +}
>> +
>> static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>> void *value, u64 flags)
>> {
>> @@ -602,6 +646,8 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>> u32 i, trampoline_start, image_off = 0;
>> void *cur_image = NULL, *image = NULL;
>> struct bpf_link **plink;
>> + struct bpf_ksym **pksym;
>> + const char *tname, *mname;
>> if (flags)
>> return -EINVAL;
>> @@ -641,14 +687,18 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>> kdata = &kvalue->data;
>> plink = st_map->links;
>> + pksym = st_map->ksyms;
>> + tname = btf_name_by_offset(st_map->btf, t->name_off);
>> module_type = btf_type_by_id(btf_vmlinux, st_ops_ids[IDX_MODULE_ID]);
>> for_each_member(i, t, member) {
>> const struct btf_type *mtype, *ptype;
>> struct bpf_prog *prog;
>> struct bpf_tramp_link *link;
>> + struct bpf_ksym *ksym;
>> u32 moff;
>> moff = __btf_member_bit_offset(t, member) / 8;
>> + mname = btf_name_by_offset(st_map->btf, member->name_off);
>> ptype = btf_type_resolve_ptr(st_map->btf, member->type, NULL);
>> if (ptype == module_type) {
>> if (*(void **)(udata + moff))
>> @@ -718,6 +768,14 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>> &bpf_struct_ops_link_lops, prog);
>> *plink++ = &link->link;
>> + ksym = kzalloc(sizeof(*ksym), GFP_USER);
>
> link is also using kzalloc but probably both link and ksym allocation should use bpf_map_kzalloc instead. This switch can be done for both together later as a follow up patch.
>
OK, will do.
>> + if (!ksym) {
>> + bpf_prog_put(prog);
>
> afaik, this bpf_prog_put is not needed. The bpf_link_init above took the prog ownership and the bpf_struct_ops_map_put_progs() at the error path will take care of it.
>
Right, good catch
>> + err = -ENOMEM;
>> + goto reset_unlock;
>> + }
>> + *pksym = ksym;
>
> nit. Follow the *plink++ style above and does the same *pksym++ here.
>
OK
>> +
>> trampoline_start = image_off;
>> err = bpf_struct_ops_prepare_trampoline(tlinks, link,
>> &st_ops->func_models[i],
>> @@ -737,6 +795,12 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>> /* put prog_id to udata */
>> *(unsigned long *)(udata + moff) = prog->aux->id;
>> +
>> + /* init ksym for this trampoline */
>> + bpf_struct_ops_ksym_init(tname, mname,
>> + image + trampoline_start,
>> + image_off - trampoline_start,
>> + *pksym++);
>
> then uses "ksym" here.
>
>> }
>> if (st_ops->validate) {
>> @@ -785,6 +849,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>> */
>> reset_unlock:
>> + bpf_struct_ops_map_free_ksyms(st_map);
>> bpf_struct_ops_map_free_image(st_map);
>> bpf_struct_ops_map_put_progs(st_map);
>> memset(uvalue, 0, map->value_size);
>> @@ -792,6 +857,8 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>> unlock:
>> kfree(tlinks);
>> mutex_unlock(&st_map->lock);
>> + if (!err)
>> + bpf_struct_ops_map_ksyms_add(st_map);
>> return err;
>> }
>> @@ -851,7 +918,10 @@ static void __bpf_struct_ops_map_free(struct bpf_map *map)
>> if (st_map->links)
>> bpf_struct_ops_map_put_progs(st_map);
>> + if (st_map->ksyms)
>> + bpf_struct_ops_map_free_ksyms(st_map);
>> bpf_map_area_free(st_map->links);
>> + bpf_map_area_free(st_map->ksyms);
>> bpf_struct_ops_map_free_image(st_map);
>> bpf_map_area_free(st_map->uvalue);
>> bpf_map_area_free(st_map);
>> @@ -868,6 +938,9 @@ static void bpf_struct_ops_map_free(struct bpf_map *map)
>> if (btf_is_module(st_map->btf))
>> module_put(st_map->st_ops_desc->st_ops->owner);
>> + if (st_map->ksyms)
>
> This null test should not be needed.
>
Agree, will remove it.
>> + bpf_struct_ops_map_del_ksyms(st_map);
>> +
>> /* The struct_ops's function may switch to another struct_ops.
>> *
>> * For example, bpf_tcp_cc_x->init() may switch to
>> @@ -980,7 +1053,11 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
>> st_map->links =
>> bpf_map_area_alloc(st_map->funcs_cnt * sizeof(struct bpf_links *),
>> NUMA_NO_NODE);
>> - if (!st_map->uvalue || !st_map->links) {
>> +
>> + st_map->ksyms =
>> + bpf_map_area_alloc(st_map->funcs_cnt * sizeof(struct bpf_ksyms *),
>> + NUMA_NO_NODE);
>
> .map_mem_usage at least needs to include the st_map->ksyms[] pointer array. func_cnts should be used instead of btf_type_vlen(vt) for link also in .map_mem_usage.
>
Right, agree
>> + if (!st_map->uvalue || !st_map->links || !st_map->ksyms) {
>> ret = -ENOMEM;
>> goto errout_free;
>> }
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2024-11-12 12:06 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-11-11 12:16 [PATCH bpf-next v3 0/2] Add kernel symbol for struct_ops trampoline Xu Kuohai
2024-11-11 12:16 ` [PATCH bpf-next v3 1/2] bpf: Use function pointers count as struct_ops links count Xu Kuohai
2024-11-11 12:16 ` [PATCH bpf-next v3 2/2] bpf: Add kernel symbol for struct_ops trampoline Xu Kuohai
2024-11-11 23:04 ` Martin KaFai Lau
2024-11-12 12:06 ` Xu Kuohai
2024-11-11 12:54 ` [PATCH bpf-next v3 0/2] " Xu Kuohai
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).