All of lore.kernel.org
 help / color / mirror / Atom feed
From: Kui-Feng Lee <thinker.li@gmail.com>
To: bpf@vger.kernel.org, ast@kernel.org, martin.lau@linux.dev,
	song@kernel.org, kernel-team@meta.com, andrii@kernel.org
Cc: sinquersw@gmail.com, kuifeng@meta.com,
	Kui-Feng Lee <thinker.li@gmail.com>
Subject: [PATCH bpf-next 3/6] bpf: provide a function to unregister struct_ops objects from consumers.
Date: Mon, 29 Apr 2024 14:36:06 -0700	[thread overview]
Message-ID: <20240429213609.487820-4-thinker.li@gmail.com> (raw)
In-Reply-To: <20240429213609.487820-1-thinker.li@gmail.com>

bpf_struct_ops_kvalue_unreg() unregisters the struct_ops map specified by
the pointer passed in. A subsystem could use this function to unregister a
struct_ops object that was previously registered to it.

In effect, bpf_struct_ops_kvalue_unreg() detaches the corresponding st_map
of an object from the link if there is one.

Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
---
 include/linux/bpf.h         |  6 +++
 kernel/bpf/bpf_struct_ops.c | 97 ++++++++++++++++++++++++++++++++++---
 2 files changed, 97 insertions(+), 6 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 8a1500764332..eeeed4b1bd32 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1793,6 +1793,7 @@ static inline void bpf_module_put(const void *data, struct module *owner)
 		module_put(owner);
 }
 int bpf_struct_ops_link_create(union bpf_attr *attr);
+bool bpf_struct_ops_kvalue_unreg(void *data);
 
 #ifdef CONFIG_NET
 /* Define it here to avoid the use of forward declaration */
@@ -1843,6 +1844,11 @@ static inline void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_op
 {
 }
 
+static inline bool bpf_struct_ops_kvalue_unreg(void *data)
+{
+	return false;
+}
+
 #endif
 
 #if defined(CONFIG_CGROUP_BPF) && defined(CONFIG_BPF_LSM)
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 072e3416c987..8e79b02a1ccb 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -1077,9 +1077,6 @@ static void bpf_struct_ops_map_link_dealloc(struct bpf_link *link)
 	st_map = (struct bpf_struct_ops_map *)
 		rcu_dereference_protected(st_link->map, true);
 	if (st_map) {
-		/* st_link->map can be NULL if
-		 * bpf_struct_ops_link_create() fails to register.
-		 */
 		st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data);
 		map_attached_null(st_map);
 		bpf_map_put(&st_map->map);
@@ -1087,6 +1084,83 @@ static void bpf_struct_ops_map_link_dealloc(struct bpf_link *link)
 	kfree(st_link);
 }
 
+/* Called from the subsystem that consume the struct_ops.
+ *
+ * The caller should protected this function by holding rcu_read_lock() to
+ * ensure "data" is valid. However, this function may unlock rcu
+ * temporarily. The caller should not rely on the preceding rcu_read_lock()
+ * after returning from this function.
+ *
+ * Return true if unreg() success. If a call fails, it means some other
+ * task has unrgistered or is unregistering the same object.
+ */
+bool bpf_struct_ops_kvalue_unreg(void *data)
+{
+	struct bpf_struct_ops_map *st_map =
+		container_of(data, struct bpf_struct_ops_map, kvalue.data);
+	enum bpf_struct_ops_state prev_state;
+	struct bpf_struct_ops_link *st_link;
+	bool ret = false;
+
+	/* The st_map and st_link should be protected by rcu_read_lock(),
+	 * or they may have been free when we try to increase their
+	 * refcount.
+	 */
+	if (IS_ERR(bpf_map_inc_not_zero(&st_map->map)))
+		/* The map is already gone */
+		return false;
+
+	prev_state = cmpxchg(&st_map->kvalue.common.state,
+			     BPF_STRUCT_OPS_STATE_INUSE,
+			     BPF_STRUCT_OPS_STATE_TOBEFREE);
+	if (prev_state == BPF_STRUCT_OPS_STATE_INUSE) {
+		st_map->st_ops_desc->st_ops->unreg(data);
+		/* Pair with bpf_map_inc() for reg() */
+		bpf_map_put(&st_map->map);
+		/* Pair with bpf_map_inc_not_zero() above */
+		bpf_map_put(&st_map->map);
+		return true;
+	}
+	if (prev_state != BPF_STRUCT_OPS_STATE_READY)
+		goto fail;
+
+	/* With BPF_F_LINK */
+
+	st_link = rcu_dereference(st_map->attached);
+	if (!st_link || !bpf_link_inc_not_zero(&st_link->link))
+		/* The map is on the way to unregister */
+		goto fail;
+
+	rcu_read_unlock();
+	mutex_lock(&update_mutex);
+
+	if (rcu_dereference_protected(st_link->map, true) != &st_map->map)
+		/* The map should be unregistered already or on the way to
+		 * be unregistered.
+		 */
+		goto fail_unlock;
+
+	st_map->st_ops_desc->st_ops->unreg(data);
+
+	map_attached_null(st_map);
+	rcu_assign_pointer(st_link->map, NULL);
+	/* Pair with bpf_map_get() in bpf_struct_ops_link_create() or
+	 * bpf_map_inc() in bpf_struct_ops_map_link_update().
+	 */
+	bpf_map_put(&st_map->map);
+
+	ret = true;
+
+fail_unlock:
+	mutex_unlock(&update_mutex);
+	rcu_read_lock();
+	bpf_link_put(&st_link->link);
+fail:
+	bpf_map_put(&st_map->map);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(bpf_struct_ops_kvalue_unreg);
+
 static void bpf_struct_ops_map_link_show_fdinfo(const struct bpf_link *link,
 					    struct seq_file *seq)
 {
@@ -1096,7 +1170,8 @@ static void bpf_struct_ops_map_link_show_fdinfo(const struct bpf_link *link,
 	st_link = container_of(link, struct bpf_struct_ops_link, link);
 	rcu_read_lock();
 	map = rcu_dereference(st_link->map);
-	seq_printf(seq, "map_id:\t%d\n", map->id);
+	if (map)
+		seq_printf(seq, "map_id:\t%d\n", map->id);
 	rcu_read_unlock();
 }
 
@@ -1109,7 +1184,8 @@ static int bpf_struct_ops_map_link_fill_link_info(const struct bpf_link *link,
 	st_link = container_of(link, struct bpf_struct_ops_link, link);
 	rcu_read_lock();
 	map = rcu_dereference(st_link->map);
-	info->struct_ops.map_id = map->id;
+	if (map)
+		info->struct_ops.map_id = map->id;
 	rcu_read_unlock();
 	return 0;
 }
@@ -1134,6 +1210,10 @@ static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map
 	mutex_lock(&update_mutex);
 
 	old_map = rcu_dereference_protected(st_link->map, lockdep_is_held(&update_mutex));
+	if (!old_map) {
+		err = -EINVAL;
+		goto err_out;
+	}
 	if (expected_old_map && old_map != expected_old_map) {
 		err = -EPERM;
 		goto err_out;
@@ -1214,14 +1294,19 @@ int bpf_struct_ops_link_create(union bpf_attr *attr)
 	if (err)
 		goto err_out_attached;
 
+	/* Init link->map before calling reg() in case being unregistered
+	 * immediately.
+	 */
+	RCU_INIT_POINTER(link->map, map);
+
 	err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data);
 	if (err) {
+		rcu_assign_pointer(link->map, NULL);
 		bpf_link_cleanup(&link_primer);
 		/* The link has been free by bpf_link_cleanup() */
 		link = NULL;
 		goto err_out_attached;
 	}
-	RCU_INIT_POINTER(link->map, map);
 
 	return bpf_link_settle(&link_primer);
 
-- 
2.34.1


  parent reply	other threads:[~2024-04-29 21:36 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-04-29 21:36 [PATCH bpf-next 0/6] Notify user space when a struct_ops object is detached/unregisterd Kui-Feng Lee
2024-04-29 21:36 ` [PATCH bpf-next 1/6] bpf: add a pointer of the attached link to bpf_struct_ops_map Kui-Feng Lee
2024-05-01 17:01   ` Andrii Nakryiko
2024-05-01 22:15     ` Kui-Feng Lee
2024-04-29 21:36 ` [PATCH bpf-next 2/6] bpf: export bpf_link_inc_not_zero() Kui-Feng Lee
2024-04-29 21:36 ` Kui-Feng Lee [this message]
2024-05-01 18:48   ` [PATCH bpf-next 3/6] bpf: provide a function to unregister struct_ops objects from consumers Martin KaFai Lau
2024-05-01 22:15     ` Kui-Feng Lee
2024-05-01 23:06       ` Martin KaFai Lau
2024-05-02 17:56     ` Martin KaFai Lau
2024-05-02 18:29       ` Martin KaFai Lau
2024-05-03  0:41       ` Kui-Feng Lee
2024-05-03 16:19         ` Alexei Starovoitov
2024-05-03 18:09           ` Kui-Feng Lee
2024-05-03 17:17         ` Martin KaFai Lau
2024-04-29 21:36 ` [PATCH bpf-next 4/6] bpf: detach a bpf_struct_ops_map from a link Kui-Feng Lee
2024-04-29 21:36 ` [PATCH bpf-next 5/6] bpf: support epoll from bpf struct_ops links Kui-Feng Lee
2024-05-01 17:03   ` Andrii Nakryiko
2024-05-01 22:16     ` Kui-Feng Lee
2024-04-29 21:36 ` [PATCH bpf-next 6/6] selftests/bpf: test detaching " Kui-Feng Lee
2024-05-01 17:05   ` Andrii Nakryiko
2024-05-01 22:17     ` Kui-Feng Lee
2024-05-02 18:15   ` Martin KaFai Lau
2024-05-03 18:34     ` Kui-Feng Lee
2024-05-03 19:15       ` Martin KaFai Lau
2024-05-03 21:34         ` Kui-Feng Lee
2024-05-03 21:59           ` Martin KaFai Lau

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240429213609.487820-4-thinker.li@gmail.com \
    --to=thinker.li@gmail.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=kernel-team@meta.com \
    --cc=kuifeng@meta.com \
    --cc=martin.lau@linux.dev \
    --cc=sinquersw@gmail.com \
    --cc=song@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.