* [PATCH bpf-next v6 1/8] bpf: pass bpf_struct_ops_link to callbacks in bpf_struct_ops.
2024-05-24 22:30 [PATCH bpf-next v6 0/8] Notify user space when a struct_ops object is detached/unregistered Kui-Feng Lee
@ 2024-05-24 22:30 ` Kui-Feng Lee
2024-05-24 22:30 ` [PATCH bpf-next v6 2/8] bpf: enable detaching links of struct_ops objects Kui-Feng Lee
` (6 subsequent siblings)
7 siblings, 0 replies; 19+ messages in thread
From: Kui-Feng Lee @ 2024-05-24 22:30 UTC (permalink / raw)
To: bpf, ast, martin.lau, song, kernel-team, andrii
Cc: sinquersw, kuifeng, Kui-Feng Lee
Pass an additional pointer of bpf_struct_ops_link to callback function reg,
unreg, and update provided by subsystems defined in bpf_struct_ops. A
bpf_struct_ops_map can be registered for multiple links. Passing a pointer
of bpf_struct_ops_link helps subsystems to distinguish them.
This pointer will be used in the later patches to let the subsystem
initiate a detachment on a link that was registered to it previously.
Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
---
include/linux/bpf.h | 6 +++---
kernel/bpf/bpf_struct_ops.c | 10 +++++-----
net/bpf/bpf_dummy_struct_ops.c | 4 ++--
net/ipv4/bpf_tcp_ca.c | 6 +++---
.../selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c | 4 ++--
tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c | 6 +++---
6 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 90094400cc63..b600767ebe02 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1730,9 +1730,9 @@ struct bpf_struct_ops {
int (*init_member)(const struct btf_type *t,
const struct btf_member *member,
void *kdata, const void *udata);
- int (*reg)(void *kdata);
- void (*unreg)(void *kdata);
- int (*update)(void *kdata, void *old_kdata);
+ int (*reg)(void *kdata, struct bpf_link *link);
+ void (*unreg)(void *kdata, struct bpf_link *link);
+ int (*update)(void *kdata, void *old_kdata, struct bpf_link *link);
int (*validate)(void *kdata);
void *cfi_stubs;
struct module *owner;
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 86c7884abaf8..1542dded7489 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -757,7 +757,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
goto unlock;
}
- err = st_ops->reg(kdata);
+ err = st_ops->reg(kdata, NULL);
if (likely(!err)) {
/* This refcnt increment on the map here after
* 'st_ops->reg()' is secure since the state of the
@@ -805,7 +805,7 @@ static long bpf_struct_ops_map_delete_elem(struct bpf_map *map, void *key)
BPF_STRUCT_OPS_STATE_TOBEFREE);
switch (prev_state) {
case BPF_STRUCT_OPS_STATE_INUSE:
- st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data);
+ st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, NULL);
bpf_map_put(map);
return 0;
case BPF_STRUCT_OPS_STATE_TOBEFREE:
@@ -1060,7 +1060,7 @@ static void bpf_struct_ops_map_link_dealloc(struct bpf_link *link)
/* st_link->map can be NULL if
* bpf_struct_ops_link_create() fails to register.
*/
- st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data);
+ st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, link);
bpf_map_put(&st_map->map);
}
kfree(st_link);
@@ -1125,7 +1125,7 @@ static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map
goto err_out;
}
- err = st_map->st_ops_desc->st_ops->update(st_map->kvalue.data, old_st_map->kvalue.data);
+ err = st_map->st_ops_desc->st_ops->update(st_map->kvalue.data, old_st_map->kvalue.data, link);
if (err)
goto err_out;
@@ -1176,7 +1176,7 @@ int bpf_struct_ops_link_create(union bpf_attr *attr)
if (err)
goto err_out;
- err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data);
+ err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data, &link->link);
if (err) {
bpf_link_cleanup(&link_primer);
link = NULL;
diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c
index 891cdf61c65a..3ea52b05adfb 100644
--- a/net/bpf/bpf_dummy_struct_ops.c
+++ b/net/bpf/bpf_dummy_struct_ops.c
@@ -272,12 +272,12 @@ static int bpf_dummy_init_member(const struct btf_type *t,
return -EOPNOTSUPP;
}
-static int bpf_dummy_reg(void *kdata)
+static int bpf_dummy_reg(void *kdata, struct bpf_link *link)
{
return -EOPNOTSUPP;
}
-static void bpf_dummy_unreg(void *kdata)
+static void bpf_dummy_unreg(void *kdata, struct bpf_link *link)
{
}
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index 18227757ec0c..3f88d0961e5b 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -260,17 +260,17 @@ static int bpf_tcp_ca_check_member(const struct btf_type *t,
return 0;
}
-static int bpf_tcp_ca_reg(void *kdata)
+static int bpf_tcp_ca_reg(void *kdata, struct bpf_link *link)
{
return tcp_register_congestion_control(kdata);
}
-static void bpf_tcp_ca_unreg(void *kdata)
+static void bpf_tcp_ca_unreg(void *kdata, struct bpf_link *link)
{
tcp_unregister_congestion_control(kdata);
}
-static int bpf_tcp_ca_update(void *kdata, void *old_kdata)
+static int bpf_tcp_ca_update(void *kdata, void *old_kdata, struct bpf_link *link)
{
return tcp_update_congestion_control(kdata, old_kdata);
}
diff --git a/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c b/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c
index b1dd889d5d7d..948eb3962732 100644
--- a/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c
+++ b/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c
@@ -22,12 +22,12 @@ static int dummy_init_member(const struct btf_type *t,
return 0;
}
-static int dummy_reg(void *kdata)
+static int dummy_reg(void *kdata, struct bpf_link *link)
{
return 0;
}
-static void dummy_unreg(void *kdata)
+static void dummy_unreg(void *kdata, struct bpf_link *link)
{
}
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
index 2a18bd320e92..0a09732cde4b 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
@@ -820,7 +820,7 @@ static const struct bpf_verifier_ops bpf_testmod_verifier_ops = {
.is_valid_access = bpf_testmod_ops_is_valid_access,
};
-static int bpf_dummy_reg(void *kdata)
+static int bpf_dummy_reg(void *kdata, struct bpf_link *link)
{
struct bpf_testmod_ops *ops = kdata;
@@ -835,7 +835,7 @@ static int bpf_dummy_reg(void *kdata)
return 0;
}
-static void bpf_dummy_unreg(void *kdata)
+static void bpf_dummy_unreg(void *kdata, struct bpf_link *link)
{
}
@@ -871,7 +871,7 @@ struct bpf_struct_ops bpf_bpf_testmod_ops = {
.owner = THIS_MODULE,
};
-static int bpf_dummy_reg2(void *kdata)
+static int bpf_dummy_reg2(void *kdata, struct bpf_link *link)
{
struct bpf_testmod_ops2 *ops = kdata;
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH bpf-next v6 2/8] bpf: enable detaching links of struct_ops objects.
2024-05-24 22:30 [PATCH bpf-next v6 0/8] Notify user space when a struct_ops object is detached/unregistered Kui-Feng Lee
2024-05-24 22:30 ` [PATCH bpf-next v6 1/8] bpf: pass bpf_struct_ops_link to callbacks in bpf_struct_ops Kui-Feng Lee
@ 2024-05-24 22:30 ` Kui-Feng Lee
2024-05-29 6:17 ` Martin KaFai Lau
2024-05-24 22:30 ` [PATCH bpf-next v6 3/8] bpf: support epoll from bpf struct_ops links Kui-Feng Lee
` (5 subsequent siblings)
7 siblings, 1 reply; 19+ messages in thread
From: Kui-Feng Lee @ 2024-05-24 22:30 UTC (permalink / raw)
To: bpf, ast, martin.lau, song, kernel-team, andrii
Cc: sinquersw, kuifeng, Kui-Feng Lee
Implement the detach callback in bpf_link_ops for struct_ops so that user
programs can detach a struct_ops link. The subsystems that struct_ops
objects are registered to can also use this callback to detach the links
being passed to them.
Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
---
kernel/bpf/bpf_struct_ops.c | 53 ++++++++++++++++++++++++++++++++-----
1 file changed, 47 insertions(+), 6 deletions(-)
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 1542dded7489..f2439acd9757 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -1057,9 +1057,6 @@ static void bpf_struct_ops_map_link_dealloc(struct bpf_link *link)
st_map = (struct bpf_struct_ops_map *)
rcu_dereference_protected(st_link->map, true);
if (st_map) {
- /* st_link->map can be NULL if
- * bpf_struct_ops_link_create() fails to register.
- */
st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, link);
bpf_map_put(&st_map->map);
}
@@ -1075,7 +1072,8 @@ static void bpf_struct_ops_map_link_show_fdinfo(const struct bpf_link *link,
st_link = container_of(link, struct bpf_struct_ops_link, link);
rcu_read_lock();
map = rcu_dereference(st_link->map);
- seq_printf(seq, "map_id:\t%d\n", map->id);
+ if (map)
+ seq_printf(seq, "map_id:\t%d\n", map->id);
rcu_read_unlock();
}
@@ -1088,7 +1086,8 @@ static int bpf_struct_ops_map_link_fill_link_info(const struct bpf_link *link,
st_link = container_of(link, struct bpf_struct_ops_link, link);
rcu_read_lock();
map = rcu_dereference(st_link->map);
- info->struct_ops.map_id = map->id;
+ if (map)
+ info->struct_ops.map_id = map->id;
rcu_read_unlock();
return 0;
}
@@ -1113,6 +1112,10 @@ static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map
mutex_lock(&update_mutex);
old_map = rcu_dereference_protected(st_link->map, lockdep_is_held(&update_mutex));
+ if (!old_map) {
+ err = -ENOLINK;
+ goto err_out;
+ }
if (expected_old_map && old_map != expected_old_map) {
err = -EPERM;
goto err_out;
@@ -1139,8 +1142,37 @@ static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map
return err;
}
+static int bpf_struct_ops_map_link_detach(struct bpf_link *link)
+{
+ struct bpf_struct_ops_link *st_link = container_of(link, struct bpf_struct_ops_link, link);
+ struct bpf_struct_ops_map *st_map;
+ struct bpf_map *map;
+
+ mutex_lock(&update_mutex);
+
+ map = rcu_dereference_protected(st_link->map, lockdep_is_held(&update_mutex));
+ if (!map) {
+ mutex_unlock(&update_mutex);
+ return 0;
+ }
+ st_map = container_of(map, struct bpf_struct_ops_map, map);
+
+ st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, link);
+
+ rcu_assign_pointer(st_link->map, NULL);
+ /* Pair with bpf_map_get() in bpf_struct_ops_link_create() or
+ * bpf_map_inc() in bpf_struct_ops_map_link_update().
+ */
+ bpf_map_put(&st_map->map);
+
+ mutex_unlock(&update_mutex);
+
+ return 0;
+}
+
static const struct bpf_link_ops bpf_struct_ops_map_lops = {
.dealloc = bpf_struct_ops_map_link_dealloc,
+ .detach = bpf_struct_ops_map_link_detach,
.show_fdinfo = bpf_struct_ops_map_link_show_fdinfo,
.fill_link_info = bpf_struct_ops_map_link_fill_link_info,
.update_map = bpf_struct_ops_map_link_update,
@@ -1176,13 +1208,22 @@ int bpf_struct_ops_link_create(union bpf_attr *attr)
if (err)
goto err_out;
+ /* Init link->map before calling reg() in case being detached
+ * immediately.
+ */
+ RCU_INIT_POINTER(link->map, map);
+
+ mutex_lock(&update_mutex);
err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data, &link->link);
if (err) {
+ RCU_INIT_POINTER(link->map, NULL);
+ mutex_unlock(&update_mutex);
bpf_link_cleanup(&link_primer);
+ /* The link has been free by bpf_link_cleanup() */
link = NULL;
goto err_out;
}
- RCU_INIT_POINTER(link->map, map);
+ mutex_unlock(&update_mutex);
return bpf_link_settle(&link_primer);
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread* Re: [PATCH bpf-next v6 2/8] bpf: enable detaching links of struct_ops objects.
2024-05-24 22:30 ` [PATCH bpf-next v6 2/8] bpf: enable detaching links of struct_ops objects Kui-Feng Lee
@ 2024-05-29 6:17 ` Martin KaFai Lau
2024-05-29 15:04 ` Kuifeng Lee
0 siblings, 1 reply; 19+ messages in thread
From: Martin KaFai Lau @ 2024-05-29 6:17 UTC (permalink / raw)
To: Kui-Feng Lee; +Cc: bpf, ast, song, kernel-team, andrii, sinquersw, kuifeng
On 5/24/24 3:30 PM, Kui-Feng Lee wrote:
> +static int bpf_struct_ops_map_link_detach(struct bpf_link *link)
> +{
> + struct bpf_struct_ops_link *st_link = container_of(link, struct bpf_struct_ops_link, link);
> + struct bpf_struct_ops_map *st_map;
> + struct bpf_map *map;
> +
> + mutex_lock(&update_mutex);
update_mutex is needed to detach.
> +
> + map = rcu_dereference_protected(st_link->map, lockdep_is_held(&update_mutex));
> + if (!map) {
> + mutex_unlock(&update_mutex);
> + return 0;
> + }
> + st_map = container_of(map, struct bpf_struct_ops_map, map);
> +
> + st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, link);
> +
> + rcu_assign_pointer(st_link->map, NULL);
> + /* Pair with bpf_map_get() in bpf_struct_ops_link_create() or
> + * bpf_map_inc() in bpf_struct_ops_map_link_update().
> + */
> + bpf_map_put(&st_map->map);
> +
> + mutex_unlock(&update_mutex);
> +
> + return 0;
> +}
> +
> static const struct bpf_link_ops bpf_struct_ops_map_lops = {
> .dealloc = bpf_struct_ops_map_link_dealloc,
> + .detach = bpf_struct_ops_map_link_detach,
> .show_fdinfo = bpf_struct_ops_map_link_show_fdinfo,
> .fill_link_info = bpf_struct_ops_map_link_fill_link_info,
> .update_map = bpf_struct_ops_map_link_update,
> @@ -1176,13 +1208,22 @@ int bpf_struct_ops_link_create(union bpf_attr *attr)
> if (err)
> goto err_out;
>
> + /* Init link->map before calling reg() in case being detached
> + * immediately.
> + */
With update_mutex held in link_create here, the parallel detach can still happen
before the link is fully initialized (the link->map pointer here in particular)?
> + RCU_INIT_POINTER(link->map, map);
> +
> + mutex_lock(&update_mutex);
> err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data, &link->link);
> if (err) {
> + RCU_INIT_POINTER(link->map, NULL);
I was hoping by holding the the update_mutex, it can avoid this link->map init
dance, like RCU_INIT_POINTER(link->map, map) above and then resetting here on
the error case.
> + mutex_unlock(&update_mutex);
> bpf_link_cleanup(&link_primer);
> + /* The link has been free by bpf_link_cleanup() */
> link = NULL;
> goto err_out;
> }
> - RCU_INIT_POINTER(link->map, map);
If only init link->map once here like the existing code (and the init is
protected by the update_mutex), the subsystem should not be able to detach until
the link->map is fully initialized.
or I am missing something obvious. Can you explain why this link->map init dance
is still needed?
> + mutex_unlock(&update_mutex);
^ permalink raw reply [flat|nested] 19+ messages in thread* Re: [PATCH bpf-next v6 2/8] bpf: enable detaching links of struct_ops objects.
2024-05-29 6:17 ` Martin KaFai Lau
@ 2024-05-29 15:04 ` Kuifeng Lee
2024-05-29 22:38 ` Martin KaFai Lau
0 siblings, 1 reply; 19+ messages in thread
From: Kuifeng Lee @ 2024-05-29 15:04 UTC (permalink / raw)
To: Martin KaFai Lau
Cc: Kui-Feng Lee, bpf, ast, song, kernel-team, andrii, kuifeng
On Tue, May 28, 2024 at 11:17 PM Martin KaFai Lau <martin.lau@linux.dev> wrote:
>
> On 5/24/24 3:30 PM, Kui-Feng Lee wrote:
> > +static int bpf_struct_ops_map_link_detach(struct bpf_link *link)
> > +{
> > + struct bpf_struct_ops_link *st_link = container_of(link, struct bpf_struct_ops_link, link);
> > + struct bpf_struct_ops_map *st_map;
> > + struct bpf_map *map;
> > +
> > + mutex_lock(&update_mutex);
>
> update_mutex is needed to detach.
>
> > +
> > + map = rcu_dereference_protected(st_link->map, lockdep_is_held(&update_mutex));
> > + if (!map) {
> > + mutex_unlock(&update_mutex);
> > + return 0;
> > + }
> > + st_map = container_of(map, struct bpf_struct_ops_map, map);
> > +
> > + st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, link);
> > +
> > + rcu_assign_pointer(st_link->map, NULL);
> > + /* Pair with bpf_map_get() in bpf_struct_ops_link_create() or
> > + * bpf_map_inc() in bpf_struct_ops_map_link_update().
> > + */
> > + bpf_map_put(&st_map->map);
> > +
> > + mutex_unlock(&update_mutex);
> > +
> > + return 0;
> > +}
> > +
> > static const struct bpf_link_ops bpf_struct_ops_map_lops = {
> > .dealloc = bpf_struct_ops_map_link_dealloc,
> > + .detach = bpf_struct_ops_map_link_detach,
> > .show_fdinfo = bpf_struct_ops_map_link_show_fdinfo,
> > .fill_link_info = bpf_struct_ops_map_link_fill_link_info,
> > .update_map = bpf_struct_ops_map_link_update,
> > @@ -1176,13 +1208,22 @@ int bpf_struct_ops_link_create(union bpf_attr *attr)
> > if (err)
> > goto err_out;
> >
> > + /* Init link->map before calling reg() in case being detached
> > + * immediately.
> > + */
>
> With update_mutex held in link_create here, the parallel detach can still happen
> before the link is fully initialized (the link->map pointer here in particular)?
>
> > + RCU_INIT_POINTER(link->map, map);
> > +
> > + mutex_lock(&update_mutex);
> > err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data, &link->link);
> > if (err) {
> > + RCU_INIT_POINTER(link->map, NULL);
>
> I was hoping by holding the the update_mutex, it can avoid this link->map init
> dance, like RCU_INIT_POINTER(link->map, map) above and then resetting here on
> the error case.
>
> > + mutex_unlock(&update_mutex);
> > bpf_link_cleanup(&link_primer);
> > + /* The link has been free by bpf_link_cleanup() */
> > link = NULL;
> > goto err_out;
> > }
> > - RCU_INIT_POINTER(link->map, map);
>
> If only init link->map once here like the existing code (and the init is
> protected by the update_mutex), the subsystem should not be able to detach until
> the link->map is fully initialized.
>
> or I am missing something obvious. Can you explain why this link->map init dance
> is still needed?
Ok, I get what you mean.
I will move RCU_INIT_POINTER() back to its original place, and move the check
on the value of "err" to the place after mutext_unlock(). Is it what you like?
>
> > + mutex_unlock(&update_mutex);
>
^ permalink raw reply [flat|nested] 19+ messages in thread* Re: [PATCH bpf-next v6 2/8] bpf: enable detaching links of struct_ops objects.
2024-05-29 15:04 ` Kuifeng Lee
@ 2024-05-29 22:38 ` Martin KaFai Lau
2024-05-29 23:26 ` Kuifeng Lee
0 siblings, 1 reply; 19+ messages in thread
From: Martin KaFai Lau @ 2024-05-29 22:38 UTC (permalink / raw)
To: Kuifeng Lee; +Cc: Kui-Feng Lee, bpf, ast, song, kernel-team, andrii, kuifeng
On 5/29/24 8:04 AM, Kuifeng Lee wrote:
> On Tue, May 28, 2024 at 11:17 PM Martin KaFai Lau <martin.lau@linux.dev> wrote:
>>
>> On 5/24/24 3:30 PM, Kui-Feng Lee wrote:
>>> +static int bpf_struct_ops_map_link_detach(struct bpf_link *link)
>>> +{
>>> + struct bpf_struct_ops_link *st_link = container_of(link, struct bpf_struct_ops_link, link);
>>> + struct bpf_struct_ops_map *st_map;
>>> + struct bpf_map *map;
>>> +
>>> + mutex_lock(&update_mutex);
>>
>> update_mutex is needed to detach.
>>
>>> +
>>> + map = rcu_dereference_protected(st_link->map, lockdep_is_held(&update_mutex));
>>> + if (!map) {
>>> + mutex_unlock(&update_mutex);
>>> + return 0;
>>> + }
>>> + st_map = container_of(map, struct bpf_struct_ops_map, map);
>>> +
>>> + st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, link);
>>> +
>>> + rcu_assign_pointer(st_link->map, NULL);
>>> + /* Pair with bpf_map_get() in bpf_struct_ops_link_create() or
>>> + * bpf_map_inc() in bpf_struct_ops_map_link_update().
>>> + */
>>> + bpf_map_put(&st_map->map);
>>> +
>>> + mutex_unlock(&update_mutex);
>>> +
>>> + return 0;
>>> +}
>>> +
>>> static const struct bpf_link_ops bpf_struct_ops_map_lops = {
>>> .dealloc = bpf_struct_ops_map_link_dealloc,
>>> + .detach = bpf_struct_ops_map_link_detach,
>>> .show_fdinfo = bpf_struct_ops_map_link_show_fdinfo,
>>> .fill_link_info = bpf_struct_ops_map_link_fill_link_info,
>>> .update_map = bpf_struct_ops_map_link_update,
>>> @@ -1176,13 +1208,22 @@ int bpf_struct_ops_link_create(union bpf_attr *attr)
>>> if (err)
>>> goto err_out;
>>>
>>> + /* Init link->map before calling reg() in case being detached
>>> + * immediately.
>>> + */
>>
>> With update_mutex held in link_create here, the parallel detach can still happen
>> before the link is fully initialized (the link->map pointer here in particular)?
>>
>>> + RCU_INIT_POINTER(link->map, map);
>>> +
>>> + mutex_lock(&update_mutex);
>>> err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data, &link->link);
>>> if (err) {
>>> + RCU_INIT_POINTER(link->map, NULL);
>>
>> I was hoping by holding the the update_mutex, it can avoid this link->map init
>> dance, like RCU_INIT_POINTER(link->map, map) above and then resetting here on
>> the error case.
>>
>>> + mutex_unlock(&update_mutex);
>>> bpf_link_cleanup(&link_primer);
>>> + /* The link has been free by bpf_link_cleanup() */
>>> link = NULL;
>>> goto err_out;
>>> }
>>> - RCU_INIT_POINTER(link->map, map);
>>
>> If only init link->map once here like the existing code (and the init is
>> protected by the update_mutex), the subsystem should not be able to detach until
>> the link->map is fully initialized.
>>
>> or I am missing something obvious. Can you explain why this link->map init dance
>> is still needed?
>
> Ok, I get what you mean.
>
> I will move RCU_INIT_POINTER() back to its original place, and move the check
> on the value of "err" to the place after mutext_unlock().
The RCU_INIT_POINTER(link->map, map) needs to be done with update_mutex held and
it should be init after the err check, so the err check needs to be inside
update_mutex lock also.
Something like this (untested):
mutex_lock(&update_mutex);
err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data, &link->link);
if (err) {
mutex_unlock(&update_mutex);
bpf_link_cleanup(&link_primer);
link = NULL;
goto err_out;
}
RCU_INIT_POINTER(link->map, map);
mutex_unlock(&update_mutex);
>
>>
>>> + mutex_unlock(&update_mutex);
>>
^ permalink raw reply [flat|nested] 19+ messages in thread* Re: [PATCH bpf-next v6 2/8] bpf: enable detaching links of struct_ops objects.
2024-05-29 22:38 ` Martin KaFai Lau
@ 2024-05-29 23:26 ` Kuifeng Lee
0 siblings, 0 replies; 19+ messages in thread
From: Kuifeng Lee @ 2024-05-29 23:26 UTC (permalink / raw)
To: Martin KaFai Lau
Cc: Kui-Feng Lee, bpf, ast, song, kernel-team, andrii, kuifeng
On Wed, May 29, 2024 at 3:38 PM Martin KaFai Lau <martin.lau@linux.dev> wrote:
>
> On 5/29/24 8:04 AM, Kuifeng Lee wrote:
> > On Tue, May 28, 2024 at 11:17 PM Martin KaFai Lau <martin.lau@linux.dev> wrote:
> >>
> >> On 5/24/24 3:30 PM, Kui-Feng Lee wrote:
> >>> +static int bpf_struct_ops_map_link_detach(struct bpf_link *link)
> >>> +{
> >>> + struct bpf_struct_ops_link *st_link = container_of(link, struct bpf_struct_ops_link, link);
> >>> + struct bpf_struct_ops_map *st_map;
> >>> + struct bpf_map *map;
> >>> +
> >>> + mutex_lock(&update_mutex);
> >>
> >> update_mutex is needed to detach.
> >>
> >>> +
> >>> + map = rcu_dereference_protected(st_link->map, lockdep_is_held(&update_mutex));
> >>> + if (!map) {
> >>> + mutex_unlock(&update_mutex);
> >>> + return 0;
> >>> + }
> >>> + st_map = container_of(map, struct bpf_struct_ops_map, map);
> >>> +
> >>> + st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, link);
> >>> +
> >>> + rcu_assign_pointer(st_link->map, NULL);
> >>> + /* Pair with bpf_map_get() in bpf_struct_ops_link_create() or
> >>> + * bpf_map_inc() in bpf_struct_ops_map_link_update().
> >>> + */
> >>> + bpf_map_put(&st_map->map);
> >>> +
> >>> + mutex_unlock(&update_mutex);
> >>> +
> >>> + return 0;
> >>> +}
> >>> +
> >>> static const struct bpf_link_ops bpf_struct_ops_map_lops = {
> >>> .dealloc = bpf_struct_ops_map_link_dealloc,
> >>> + .detach = bpf_struct_ops_map_link_detach,
> >>> .show_fdinfo = bpf_struct_ops_map_link_show_fdinfo,
> >>> .fill_link_info = bpf_struct_ops_map_link_fill_link_info,
> >>> .update_map = bpf_struct_ops_map_link_update,
> >>> @@ -1176,13 +1208,22 @@ int bpf_struct_ops_link_create(union bpf_attr *attr)
> >>> if (err)
> >>> goto err_out;
> >>>
> >>> + /* Init link->map before calling reg() in case being detached
> >>> + * immediately.
> >>> + */
> >>
> >> With update_mutex held in link_create here, the parallel detach can still happen
> >> before the link is fully initialized (the link->map pointer here in particular)?
> >>
> >>> + RCU_INIT_POINTER(link->map, map);
> >>> +
> >>> + mutex_lock(&update_mutex);
> >>> err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data, &link->link);
> >>> if (err) {
> >>> + RCU_INIT_POINTER(link->map, NULL);
> >>
> >> I was hoping by holding the the update_mutex, it can avoid this link->map init
> >> dance, like RCU_INIT_POINTER(link->map, map) above and then resetting here on
> >> the error case.
> >>
> >>> + mutex_unlock(&update_mutex);
> >>> bpf_link_cleanup(&link_primer);
> >>> + /* The link has been free by bpf_link_cleanup() */
> >>> link = NULL;
> >>> goto err_out;
> >>> }
> >>> - RCU_INIT_POINTER(link->map, map);
> >>
> >> If only init link->map once here like the existing code (and the init is
> >> protected by the update_mutex), the subsystem should not be able to detach until
> >> the link->map is fully initialized.
> >>
> >> or I am missing something obvious. Can you explain why this link->map init dance
> >> is still needed?
> >
> > Ok, I get what you mean.
> >
> > I will move RCU_INIT_POINTER() back to its original place, and move the check
> > on the value of "err" to the place after mutext_unlock().
> The RCU_INIT_POINTER(link->map, map) needs to be done with update_mutex held and
> it should be init after the err check, so the err check needs to be inside
> update_mutex lock also.
>
> Something like this (untested):
>
> mutex_lock(&update_mutex);
>
> err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data, &link->link);
> if (err) {
> mutex_unlock(&update_mutex);
> bpf_link_cleanup(&link_primer);
> link = NULL;
> goto err_out;
> }
> RCU_INIT_POINTER(link->map, map);
>
> mutex_unlock(&update_mutex);
>
Sure! According to what we discussed off-line, the RCU_INIT_POINTER()
will be moved
back to its original place. Subsystems should not try to access link->map.
>
> >
> >>
> >>> + mutex_unlock(&update_mutex);
> >>
>
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH bpf-next v6 3/8] bpf: support epoll from bpf struct_ops links.
2024-05-24 22:30 [PATCH bpf-next v6 0/8] Notify user space when a struct_ops object is detached/unregistered Kui-Feng Lee
2024-05-24 22:30 ` [PATCH bpf-next v6 1/8] bpf: pass bpf_struct_ops_link to callbacks in bpf_struct_ops Kui-Feng Lee
2024-05-24 22:30 ` [PATCH bpf-next v6 2/8] bpf: enable detaching links of struct_ops objects Kui-Feng Lee
@ 2024-05-24 22:30 ` Kui-Feng Lee
2024-05-24 22:30 ` [PATCH bpf-next v6 4/8] bpf: export bpf_link_inc_not_zero Kui-Feng Lee
` (4 subsequent siblings)
7 siblings, 0 replies; 19+ messages in thread
From: Kui-Feng Lee @ 2024-05-24 22:30 UTC (permalink / raw)
To: bpf, ast, martin.lau, song, kernel-team, andrii
Cc: sinquersw, kuifeng, Kui-Feng Lee
Add epoll support to bpf struct_ops links to trigger EPOLLHUP event upon
detachment.
This patch implements the "poll" of the "struct file_operations" for BPF
links and introduces a new "poll" operator in the "struct bpf_link_ops". By
implementing "poll" of "struct bpf_link_ops" for the links of struct_ops,
the file descriptor of a struct_ops link can be added to an epoll file
descriptor to receive EPOLLHUP events.
Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
---
include/linux/bpf.h | 1 +
kernel/bpf/bpf_struct_ops.c | 17 +++++++++++++++++
kernel/bpf/syscall.c | 31 ++++++++++++++++++++++++++-----
3 files changed, 44 insertions(+), 5 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index b600767ebe02..5f7496ef8b7c 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1612,6 +1612,7 @@ struct bpf_link_ops {
struct bpf_link_info *info);
int (*update_map)(struct bpf_link *link, struct bpf_map *new_map,
struct bpf_map *old_map);
+ __poll_t (*poll)(struct file *file, struct poll_table_struct *pts);
};
struct bpf_tramp_link {
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index f2439acd9757..855a1b2b6e79 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -12,6 +12,7 @@
#include <linux/mutex.h>
#include <linux/btf_ids.h>
#include <linux/rcupdate_wait.h>
+#include <linux/poll.h>
struct bpf_struct_ops_value {
struct bpf_struct_ops_common_value common;
@@ -56,6 +57,7 @@ struct bpf_struct_ops_map {
struct bpf_struct_ops_link {
struct bpf_link link;
struct bpf_map __rcu *map;
+ wait_queue_head_t wait_hup;
};
static DEFINE_MUTEX(update_mutex);
@@ -1167,15 +1169,28 @@ static int bpf_struct_ops_map_link_detach(struct bpf_link *link)
mutex_unlock(&update_mutex);
+ wake_up_interruptible_poll(&st_link->wait_hup, EPOLLHUP);
+
return 0;
}
+static __poll_t bpf_struct_ops_map_link_poll(struct file *file,
+ struct poll_table_struct *pts)
+{
+ struct bpf_struct_ops_link *st_link = file->private_data;
+
+ poll_wait(file, &st_link->wait_hup, pts);
+
+ return rcu_access_pointer(st_link->map) ? 0 : EPOLLHUP;
+}
+
static const struct bpf_link_ops bpf_struct_ops_map_lops = {
.dealloc = bpf_struct_ops_map_link_dealloc,
.detach = bpf_struct_ops_map_link_detach,
.show_fdinfo = bpf_struct_ops_map_link_show_fdinfo,
.fill_link_info = bpf_struct_ops_map_link_fill_link_info,
.update_map = bpf_struct_ops_map_link_update,
+ .poll = bpf_struct_ops_map_link_poll,
};
int bpf_struct_ops_link_create(union bpf_attr *attr)
@@ -1213,6 +1228,8 @@ int bpf_struct_ops_link_create(union bpf_attr *attr)
*/
RCU_INIT_POINTER(link->map, map);
+ init_waitqueue_head(&link->wait_hup);
+
mutex_lock(&update_mutex);
err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data, &link->link);
if (err) {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 13ad74ecf2cd..741f91153fe6 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3150,6 +3150,13 @@ static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp)
}
#endif
+static __poll_t bpf_link_poll(struct file *file, struct poll_table_struct *pts)
+{
+ struct bpf_link *link = file->private_data;
+
+ return link->ops->poll(file, pts);
+}
+
static const struct file_operations bpf_link_fops = {
#ifdef CONFIG_PROC_FS
.show_fdinfo = bpf_link_show_fdinfo,
@@ -3159,6 +3166,16 @@ static const struct file_operations bpf_link_fops = {
.write = bpf_dummy_write,
};
+static const struct file_operations bpf_link_fops_poll = {
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = bpf_link_show_fdinfo,
+#endif
+ .release = bpf_link_release,
+ .read = bpf_dummy_read,
+ .write = bpf_dummy_write,
+ .poll = bpf_link_poll,
+};
+
static int bpf_link_alloc_id(struct bpf_link *link)
{
int id;
@@ -3201,7 +3218,9 @@ int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer)
return id;
}
- file = anon_inode_getfile("bpf_link", &bpf_link_fops, link, O_CLOEXEC);
+ file = anon_inode_getfile("bpf_link",
+ link->ops->poll ? &bpf_link_fops_poll : &bpf_link_fops,
+ link, O_CLOEXEC);
if (IS_ERR(file)) {
bpf_link_free_id(id);
put_unused_fd(fd);
@@ -3229,7 +3248,9 @@ int bpf_link_settle(struct bpf_link_primer *primer)
int bpf_link_new_fd(struct bpf_link *link)
{
- return anon_inode_getfd("bpf-link", &bpf_link_fops, link, O_CLOEXEC);
+ return anon_inode_getfd("bpf-link",
+ link->ops->poll ? &bpf_link_fops_poll : &bpf_link_fops,
+ link, O_CLOEXEC);
}
struct bpf_link *bpf_link_get_from_fd(u32 ufd)
@@ -3239,7 +3260,7 @@ struct bpf_link *bpf_link_get_from_fd(u32 ufd)
if (!f.file)
return ERR_PTR(-EBADF);
- if (f.file->f_op != &bpf_link_fops) {
+ if (f.file->f_op != &bpf_link_fops && f.file->f_op != &bpf_link_fops_poll) {
fdput(f);
return ERR_PTR(-EINVAL);
}
@@ -4966,7 +4987,7 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
uattr);
else if (f.file->f_op == &btf_fops)
err = bpf_btf_get_info_by_fd(f.file, f.file->private_data, attr, uattr);
- else if (f.file->f_op == &bpf_link_fops)
+ else if (f.file->f_op == &bpf_link_fops || f.file->f_op == &bpf_link_fops_poll)
err = bpf_link_get_info_by_fd(f.file, f.file->private_data,
attr, uattr);
else
@@ -5101,7 +5122,7 @@ static int bpf_task_fd_query(const union bpf_attr *attr,
if (!file)
return -EBADF;
- if (file->f_op == &bpf_link_fops) {
+ if (file->f_op == &bpf_link_fops || file->f_op == &bpf_link_fops_poll) {
struct bpf_link *link = file->private_data;
if (link->ops == &bpf_raw_tp_link_lops) {
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH bpf-next v6 4/8] bpf: export bpf_link_inc_not_zero.
2024-05-24 22:30 [PATCH bpf-next v6 0/8] Notify user space when a struct_ops object is detached/unregistered Kui-Feng Lee
` (2 preceding siblings ...)
2024-05-24 22:30 ` [PATCH bpf-next v6 3/8] bpf: support epoll from bpf struct_ops links Kui-Feng Lee
@ 2024-05-24 22:30 ` Kui-Feng Lee
2024-05-24 22:30 ` [PATCH bpf-next v6 5/8] selftests/bpf: test struct_ops with epoll Kui-Feng Lee
` (3 subsequent siblings)
7 siblings, 0 replies; 19+ messages in thread
From: Kui-Feng Lee @ 2024-05-24 22:30 UTC (permalink / raw)
To: bpf, ast, martin.lau, song, kernel-team, andrii
Cc: sinquersw, kuifeng, Kui-Feng Lee
bpf_link_inc_not_zero() will be used by kernel modules. We will use it in
bpf_testmod.c later.
Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
---
include/linux/bpf.h | 6 ++++++
kernel/bpf/syscall.c | 3 ++-
2 files changed, 8 insertions(+), 1 deletion(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 5f7496ef8b7c..6b592094f9b4 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2351,6 +2351,7 @@ int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer);
int bpf_link_settle(struct bpf_link_primer *primer);
void bpf_link_cleanup(struct bpf_link_primer *primer);
void bpf_link_inc(struct bpf_link *link);
+struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link);
void bpf_link_put(struct bpf_link *link);
int bpf_link_new_fd(struct bpf_link *link);
struct bpf_link *bpf_link_get_from_fd(u32 ufd);
@@ -2722,6 +2723,11 @@ static inline void bpf_link_inc(struct bpf_link *link)
{
}
+static inline struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link)
+{
+ return NULL;
+}
+
static inline void bpf_link_put(struct bpf_link *link)
{
}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 741f91153fe6..0d59de63540d 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -5432,10 +5432,11 @@ static int link_detach(union bpf_attr *attr)
return ret;
}
-static struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link)
+struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link)
{
return atomic64_fetch_add_unless(&link->refcnt, 1, 0) ? link : ERR_PTR(-ENOENT);
}
+EXPORT_SYMBOL(bpf_link_inc_not_zero);
struct bpf_link *bpf_link_by_id(u32 id)
{
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH bpf-next v6 5/8] selftests/bpf: test struct_ops with epoll
2024-05-24 22:30 [PATCH bpf-next v6 0/8] Notify user space when a struct_ops object is detached/unregistered Kui-Feng Lee
` (3 preceding siblings ...)
2024-05-24 22:30 ` [PATCH bpf-next v6 4/8] bpf: export bpf_link_inc_not_zero Kui-Feng Lee
@ 2024-05-24 22:30 ` Kui-Feng Lee
2024-05-29 22:26 ` Martin KaFai Lau
2024-05-24 22:30 ` [PATCH bpf-next v6 6/8] selftests/bpf: detach a struct_ops link from the subsystem managing it Kui-Feng Lee
` (2 subsequent siblings)
7 siblings, 1 reply; 19+ messages in thread
From: Kui-Feng Lee @ 2024-05-24 22:30 UTC (permalink / raw)
To: bpf, ast, martin.lau, song, kernel-team, andrii
Cc: sinquersw, kuifeng, Kui-Feng Lee
Verify whether a user space program is informed through epoll with EPOLLHUP
when a struct_ops object is detached.
The BPF code in selftests/bpf/progs/struct_ops_module.c has become
complex. Therefore, struct_ops_detach.c has been added to segregate the BPF
code for detachment tests from the BPF code for other tests based on the
recommendation of Andrii Nakryiko.
Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
---
.../bpf/prog_tests/test_struct_ops_module.c | 57 +++++++++++++++++++
.../selftests/bpf/progs/struct_ops_detach.c | 9 +++
2 files changed, 66 insertions(+)
create mode 100644 tools/testing/selftests/bpf/progs/struct_ops_detach.c
diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c
index 29e183a80f49..bbcf12696a6b 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c
@@ -3,9 +3,12 @@
#include <test_progs.h>
#include <time.h>
+#include <sys/epoll.h>
+
#include "struct_ops_module.skel.h"
#include "struct_ops_nulled_out_cb.skel.h"
#include "struct_ops_forgotten_cb.skel.h"
+#include "struct_ops_detach.skel.h"
static void check_map_info(struct bpf_map_info *info)
{
@@ -242,6 +245,58 @@ static void test_struct_ops_forgotten_cb(void)
struct_ops_forgotten_cb__destroy(skel);
}
+/* Detach a link from a user space program */
+static void test_detach_link(void)
+{
+ struct epoll_event ev, events[2];
+ struct struct_ops_detach *skel;
+ struct bpf_link *link = NULL;
+ int fd, epollfd = -1, nfds;
+ int err;
+
+ skel = struct_ops_detach__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_detach__open_and_load"))
+ return;
+
+ link = bpf_map__attach_struct_ops(skel->maps.testmod_do_detach);
+ if (!ASSERT_OK_PTR(link, "attach_struct_ops"))
+ goto cleanup;
+
+ fd = bpf_link__fd(link);
+ if (!ASSERT_GE(fd, 0, "link_fd"))
+ goto cleanup;
+
+ epollfd = epoll_create1(0);
+ if (!ASSERT_GE(epollfd, 0, "epoll_create1"))
+ goto cleanup;
+
+ ev.events = EPOLLHUP;
+ ev.data.fd = fd;
+ err = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev);
+ if (!ASSERT_OK(err, "epoll_ctl"))
+ goto cleanup;
+
+ err = bpf_link__detach(link);
+ if (!ASSERT_OK(err, "detach_link"))
+ goto cleanup;
+
+ /* Wait for EPOLLHUP */
+ nfds = epoll_wait(epollfd, events, 2, 500);
+ if (!ASSERT_EQ(nfds, 1, "epoll_wait"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(events[0].data.fd, fd, "epoll_wait_fd"))
+ goto cleanup;
+ if (!ASSERT_TRUE(events[0].events & EPOLLHUP, "events[0].events"))
+ goto cleanup;
+
+cleanup:
+ if (epollfd >= 0)
+ close(epollfd);
+ bpf_link__destroy(link);
+ struct_ops_detach__destroy(skel);
+}
+
void serial_test_struct_ops_module(void)
{
if (test__start_subtest("struct_ops_load"))
@@ -254,5 +309,7 @@ void serial_test_struct_ops_module(void)
test_struct_ops_nulled_out_cb();
if (test__start_subtest("struct_ops_forgotten_cb"))
test_struct_ops_forgotten_cb();
+ if (test__start_subtest("test_detach_link"))
+ test_detach_link();
}
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_detach.c b/tools/testing/selftests/bpf/progs/struct_ops_detach.c
new file mode 100644
index 000000000000..45eacc2ca657
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_detach.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include "../bpf_testmod/bpf_testmod.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_do_detach;
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread* Re: [PATCH bpf-next v6 5/8] selftests/bpf: test struct_ops with epoll
2024-05-24 22:30 ` [PATCH bpf-next v6 5/8] selftests/bpf: test struct_ops with epoll Kui-Feng Lee
@ 2024-05-29 22:26 ` Martin KaFai Lau
0 siblings, 0 replies; 19+ messages in thread
From: Martin KaFai Lau @ 2024-05-29 22:26 UTC (permalink / raw)
To: Kui-Feng Lee; +Cc: bpf, ast, song, kernel-team, andrii, sinquersw, kuifeng
On 5/24/24 3:30 PM, Kui-Feng Lee wrote:
> diff --git a/tools/testing/selftests/bpf/progs/struct_ops_detach.c b/tools/testing/selftests/bpf/progs/struct_ops_detach.c
> new file mode 100644
> index 000000000000..45eacc2ca657
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/struct_ops_detach.c
> @@ -0,0 +1,9 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
> +#include <vmlinux.h>
> +#include "../bpf_testmod/bpf_testmod.h"
> +
> +char _license[] SEC("license") = "GPL";
> +
> +SEC(".struct_ops.link")
> +struct bpf_testmod_ops testmod_do_detach
I was trying if the set can go without patch 6/7 but patch 5 cannot compile by
itself... :(
progs/struct_ops_detach.c:6:16: error: expected ';' after top level declarator
6 | char _license[] SEC("license") = "GPL";
| ^
| ;
progs/struct_ops_detach.c:8:5: error: expected parameter declarator
8 | SEC(".struct_ops.link")
pw-bot: cr
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH bpf-next v6 6/8] selftests/bpf: detach a struct_ops link from the subsystem managing it.
2024-05-24 22:30 [PATCH bpf-next v6 0/8] Notify user space when a struct_ops object is detached/unregistered Kui-Feng Lee
` (4 preceding siblings ...)
2024-05-24 22:30 ` [PATCH bpf-next v6 5/8] selftests/bpf: test struct_ops with epoll Kui-Feng Lee
@ 2024-05-24 22:30 ` Kui-Feng Lee
2024-05-29 21:51 ` Martin KaFai Lau
2024-05-24 22:30 ` [PATCH bpf-next v6 7/8] selftests/bpf: make sure bpf_testmod handling racing link destroying well Kui-Feng Lee
2024-05-24 22:30 ` [PATCH bpf-next v6 8/8] bpftool: Change pid_iter.bpf.c to comply with the change of bpf_link_fops Kui-Feng Lee
7 siblings, 1 reply; 19+ messages in thread
From: Kui-Feng Lee @ 2024-05-24 22:30 UTC (permalink / raw)
To: bpf, ast, martin.lau, song, kernel-team, andrii
Cc: sinquersw, kuifeng, Kui-Feng Lee
Not only a user space program can detach a struct_ops link, the subsystem
managing a link can also detach the link. This patch adds a kfunc to
simulate detaching a link by the subsystem managing it and makes sure user
space programs get notified through epoll.
Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
---
.../selftests/bpf/bpf_testmod/bpf_testmod.c | 42 ++++++++++++
.../bpf/bpf_testmod/bpf_testmod_kfunc.h | 1 +
.../bpf/prog_tests/test_struct_ops_module.c | 67 +++++++++++++++++++
.../selftests/bpf/progs/struct_ops_detach.c | 7 ++
4 files changed, 117 insertions(+)
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
index 0a09732cde4b..2b3a89609b7e 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
@@ -744,6 +744,38 @@ __bpf_kfunc int bpf_kfunc_call_kernel_getpeername(struct addr_args *args)
return err;
}
+static DEFINE_SPINLOCK(detach_lock);
+static struct bpf_link *link_to_detach;
+
+__bpf_kfunc int bpf_dummy_do_link_detach(void)
+{
+ struct bpf_link *link;
+ int ret = -ENOENT;
+
+ /* A subsystem must ensure that a link is valid when detaching the
+ * link. In order to achieve that, the subsystem may need to obtain
+ * a lock to safeguard a table that holds the pointer to the link
+ * being detached. However, the subsystem cannot invoke
+ * link->ops->detach() while holding the lock because other tasks
+ * may be in the process of unregistering, which could lead to
+ * acquiring the same lock and causing a deadlock. This is why
+ * bpf_link_inc_not_zero() is used to maintain the link's validity.
+ */
+ spin_lock(&detach_lock);
+ link = link_to_detach;
+ /* Make sure the link is still valid by increasing its refcnt */
+ if (link && IS_ERR(bpf_link_inc_not_zero(link)))
+ link = NULL;
+ spin_unlock(&detach_lock);
+
+ if (link) {
+ ret = link->ops->detach(link);
+ bpf_link_put(link);
+ }
+
+ return ret;
+}
+
BTF_KFUNCS_START(bpf_testmod_check_kfunc_ids)
BTF_ID_FLAGS(func, bpf_testmod_test_mod_kfunc)
BTF_ID_FLAGS(func, bpf_kfunc_call_test1)
@@ -780,6 +812,7 @@ BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_sendmsg, KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_kfunc_call_sock_sendmsg, KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_getsockname, KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_getpeername, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_dummy_do_link_detach)
BTF_KFUNCS_END(bpf_testmod_check_kfunc_ids)
static int bpf_testmod_ops_init(struct btf *btf)
@@ -832,11 +865,20 @@ static int bpf_dummy_reg(void *kdata, struct bpf_link *link)
if (ops->test_2)
ops->test_2(4, ops->data);
+ spin_lock(&detach_lock);
+ if (!link_to_detach)
+ link_to_detach = link;
+ spin_unlock(&detach_lock);
+
return 0;
}
static void bpf_dummy_unreg(void *kdata, struct bpf_link *link)
{
+ spin_lock(&detach_lock);
+ if (link == link_to_detach)
+ link_to_detach = NULL;
+ spin_unlock(&detach_lock);
}
static int bpf_testmod_test_1(void)
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h
index b0d586a6751f..19131baf4a9e 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h
@@ -121,6 +121,7 @@ void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p);
void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p);
void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p);
void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len);
+int bpf_dummy_do_link_detach(void) __ksym;
void bpf_kfunc_common_test(void) __ksym;
diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c
index bbcf12696a6b..f4000bf04752 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
#include <test_progs.h>
#include <time.h>
+#include <network_helpers.h>
#include <sys/epoll.h>
@@ -297,6 +298,70 @@ static void test_detach_link(void)
struct_ops_detach__destroy(skel);
}
+/* Detach a link from the subsystem that the link was registered to */
+static void test_subsystem_detach(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4));
+ struct epoll_event ev, events[2];
+ struct struct_ops_detach *skel;
+ struct bpf_link *link = NULL;
+ int fd, epollfd = -1, nfds;
+ int prog_fd;
+ int err;
+
+ skel = struct_ops_detach__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_detach_open_and_load"))
+ return;
+
+ link = bpf_map__attach_struct_ops(skel->maps.testmod_do_detach);
+ if (!ASSERT_OK_PTR(link, "attach_struct_ops"))
+ goto cleanup;
+
+ fd = bpf_link__fd(link);
+ if (!ASSERT_GE(fd, 0, "link_fd"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(skel->progs.start_detach);
+ if (!ASSERT_GE(prog_fd, 0, "start_detach_fd"))
+ goto cleanup;
+
+ /* Do detachment from the registered subsystem */
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "start_detach_run"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(topts.retval, 0, "start_detach_run_retval"))
+ goto cleanup;
+
+ epollfd = epoll_create1(0);
+ if (!ASSERT_GE(epollfd, 0, "epoll_create1"))
+ goto cleanup;
+
+ ev.events = EPOLLHUP;
+ ev.data.fd = fd;
+ err = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev);
+ if (!ASSERT_OK(err, "epoll_ctl"))
+ goto cleanup;
+
+ /* Wait for EPOLLHUP */
+ nfds = epoll_wait(epollfd, events, 2, 5000);
+ if (!ASSERT_EQ(nfds, 1, "epoll_wait"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(events[0].data.fd, fd, "epoll_wait_fd"))
+ goto cleanup;
+ if (!ASSERT_TRUE(events[0].events & EPOLLHUP, "events[0].events"))
+ goto cleanup;
+
+cleanup:
+ if (epollfd >= 0)
+ close(epollfd);
+ bpf_link__destroy(link);
+ struct_ops_detach__destroy(skel);
+}
+
void serial_test_struct_ops_module(void)
{
if (test__start_subtest("struct_ops_load"))
@@ -311,5 +376,7 @@ void serial_test_struct_ops_module(void)
test_struct_ops_forgotten_cb();
if (test__start_subtest("test_detach_link"))
test_detach_link();
+ if (test__start_subtest("test_subsystem_detach"))
+ test_subsystem_detach();
}
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_detach.c b/tools/testing/selftests/bpf/progs/struct_ops_detach.c
index 45eacc2ca657..5c742b0df04d 100644
--- a/tools/testing/selftests/bpf/progs/struct_ops_detach.c
+++ b/tools/testing/selftests/bpf/progs/struct_ops_detach.c
@@ -2,8 +2,15 @@
/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
#include <vmlinux.h>
#include "../bpf_testmod/bpf_testmod.h"
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
char _license[] SEC("license") = "GPL";
SEC(".struct_ops.link")
struct bpf_testmod_ops testmod_do_detach;
+
+SEC("tc")
+int start_detach(void *skb)
+{
+ return bpf_dummy_do_link_detach();
+}
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread* Re: [PATCH bpf-next v6 6/8] selftests/bpf: detach a struct_ops link from the subsystem managing it.
2024-05-24 22:30 ` [PATCH bpf-next v6 6/8] selftests/bpf: detach a struct_ops link from the subsystem managing it Kui-Feng Lee
@ 2024-05-29 21:51 ` Martin KaFai Lau
[not found] ` <CAHE2DV0RBf9JbkmngsdKdER5F2KmUXwY_JH44Z09DsY0VNa37A@mail.gmail.com>
0 siblings, 1 reply; 19+ messages in thread
From: Martin KaFai Lau @ 2024-05-29 21:51 UTC (permalink / raw)
To: Kui-Feng Lee; +Cc: bpf, ast, song, kernel-team, andrii, sinquersw, kuifeng
On 5/24/24 3:30 PM, Kui-Feng Lee wrote:
> @@ -832,11 +865,20 @@ static int bpf_dummy_reg(void *kdata, struct bpf_link *link)
> if (ops->test_2)
> ops->test_2(4, ops->data);
>
> + spin_lock(&detach_lock);
> + if (!link_to_detach)
> + link_to_detach = link;
bpf_testmod_ops is used in a few different tests now. Can you check if
"./test_progs -j <num_of_parallel_workers>" will work considering link_to_detach
here is the very first registered link.
> + spin_unlock(&detach_lock);
> +
> return 0;
> }
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH bpf-next v6 7/8] selftests/bpf: make sure bpf_testmod handling racing link destroying well.
2024-05-24 22:30 [PATCH bpf-next v6 0/8] Notify user space when a struct_ops object is detached/unregistered Kui-Feng Lee
` (5 preceding siblings ...)
2024-05-24 22:30 ` [PATCH bpf-next v6 6/8] selftests/bpf: detach a struct_ops link from the subsystem managing it Kui-Feng Lee
@ 2024-05-24 22:30 ` Kui-Feng Lee
2024-05-24 22:30 ` [PATCH bpf-next v6 8/8] bpftool: Change pid_iter.bpf.c to comply with the change of bpf_link_fops Kui-Feng Lee
7 siblings, 0 replies; 19+ messages in thread
From: Kui-Feng Lee @ 2024-05-24 22:30 UTC (permalink / raw)
To: bpf, ast, martin.lau, song, kernel-team, andrii
Cc: sinquersw, kuifeng, Kui-Feng Lee
Do detachment from the subsystem after a link being closed/freed. This
test make sure the pattern implemented by bpf_dummy_do_link_detach() works
correctly.
Refer to bpf_dummy_do_link_detach() in bpf_testmod.c for more details.
Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
---
.../bpf/prog_tests/test_struct_ops_module.c | 44 +++++++++++++++++++
1 file changed, 44 insertions(+)
diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c
index f4000bf04752..3a8cdf440edd 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c
@@ -362,6 +362,48 @@ static void test_subsystem_detach(void)
struct_ops_detach__destroy(skel);
}
+/* A subsystem detaches a link while the link is going to be free. */
+static void test_subsystem_detach_free(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4));
+ struct struct_ops_detach *skel;
+ struct bpf_link *link = NULL;
+ int prog_fd;
+ int err;
+
+ skel = struct_ops_detach__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_detach_open_and_load"))
+ return;
+
+ link = bpf_map__attach_struct_ops(skel->maps.testmod_do_detach);
+ if (!ASSERT_OK_PTR(link, "attach_struct_ops"))
+ goto cleanup;
+
+ bpf_link__destroy(link);
+
+ prog_fd = bpf_program__fd(skel->progs.start_detach);
+ if (!ASSERT_GE(prog_fd, 0, "start_detach_fd"))
+ goto cleanup;
+
+ /* Do detachment from the registered subsystem */
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "start_detach_run"))
+ goto cleanup;
+
+ /* The link has zeroed refcount value or even has been
+ * unregistered, so the detachment from the subsystem should fail.
+ */
+ ASSERT_EQ(topts.retval, (u32)-ENOENT, "start_detach_run_retval");
+
+ /* Sync RCU to make sure the link is freed without any crash */
+ ASSERT_OK(kern_sync_rcu(), "kern_sync_rcu");
+
+cleanup:
+ struct_ops_detach__destroy(skel);
+}
+
void serial_test_struct_ops_module(void)
{
if (test__start_subtest("struct_ops_load"))
@@ -378,5 +420,7 @@ void serial_test_struct_ops_module(void)
test_detach_link();
if (test__start_subtest("test_subsystem_detach"))
test_subsystem_detach();
+ if (test__start_subtest("test_subsystem_detach_free"))
+ test_subsystem_detach_free();
}
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH bpf-next v6 8/8] bpftool: Change pid_iter.bpf.c to comply with the change of bpf_link_fops.
2024-05-24 22:30 [PATCH bpf-next v6 0/8] Notify user space when a struct_ops object is detached/unregistered Kui-Feng Lee
` (6 preceding siblings ...)
2024-05-24 22:30 ` [PATCH bpf-next v6 7/8] selftests/bpf: make sure bpf_testmod handling racing link destroying well Kui-Feng Lee
@ 2024-05-24 22:30 ` Kui-Feng Lee
7 siblings, 0 replies; 19+ messages in thread
From: Kui-Feng Lee @ 2024-05-24 22:30 UTC (permalink / raw)
To: bpf, ast, martin.lau, song, kernel-team, andrii
Cc: sinquersw, kuifeng, Kui-Feng Lee, Quentin Monnet
To support epoll, a new instance of file_operations, bpf_link_fops_poll,
has been added for links that support epoll. The pid_iter.bpf.c checks
f_ops for links and other BPF objects. The check should fail for struct_ops
links without this patch.
Acked-by: Quentin Monnet <qmo@kernel.org>
Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
---
tools/bpf/bpftool/skeleton/pid_iter.bpf.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c
index 7bdbcac3cf62..948dde25034e 100644
--- a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c
+++ b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c
@@ -29,6 +29,7 @@ enum bpf_link_type___local {
};
extern const void bpf_link_fops __ksym;
+extern const void bpf_link_fops_poll __ksym __weak;
extern const void bpf_map_fops __ksym;
extern const void bpf_prog_fops __ksym;
extern const void btf_fops __ksym;
@@ -84,7 +85,11 @@ int iter(struct bpf_iter__task_file *ctx)
fops = &btf_fops;
break;
case BPF_OBJ_LINK:
- fops = &bpf_link_fops;
+ if (&bpf_link_fops_poll &&
+ file->f_op == &bpf_link_fops_poll)
+ fops = &bpf_link_fops_poll;
+ else
+ fops = &bpf_link_fops;
break;
default:
return 0;
--
2.34.1
^ permalink raw reply related [flat|nested] 19+ messages in thread