cgroups.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH cgroup/for-next 0/3] Improve clarity of cgroup destruction process
@ 2025-08-26  3:40 Chen Ridong
  2025-08-26  3:40 ` [PATCH cgroup/for-next 1/3] cgroup: remove redundancy online_cnt Chen Ridong
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Chen Ridong @ 2025-08-26  3:40 UTC (permalink / raw)
  To: tj, hannes, mkoutny; +Cc: cgroups, linux-kernel, lujialin4, chenridong

From: Chen Ridong <chenridong@huawei.com>

This series aims to improve the clarity of the cgroup destruction
mechanism. Since the destruction process has been split across three
workqueues, this patch set introduces helper functions and renames
existing ones to enhance readability. As a result, the logic now follows
a more straightforward one-to-one correspondence, making the overall flow
easier to understand. Besides, patch 1 removes the redundancy online_cnt.

This series is based on 04a4d6c24eef8a1fc89d8b6129ac00ca2f638aff for the
cgroup/for-next branch.

Chen Ridong (3):
  cgroup: remove redundancy online_cnt
  cgroup: add css_free helper
  cgroup: rename css offline related functions

 include/linux/cgroup-defs.h |  6 ----
 kernel/cgroup/cgroup.c      | 59 ++++++++++++++++++-------------------
 kernel/cgroup/debug.c       |  2 +-
 3 files changed, 30 insertions(+), 37 deletions(-)

-- 
2.34.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH cgroup/for-next 1/3] cgroup: remove redundancy online_cnt
  2025-08-26  3:40 [PATCH cgroup/for-next 0/3] Improve clarity of cgroup destruction process Chen Ridong
@ 2025-08-26  3:40 ` Chen Ridong
  2025-08-26 14:14   ` Michal Koutný
  2025-08-27  0:59   ` Chen Ridong
  2025-08-26  3:40 ` [PATCH cgroup/for-next 2/3] cgroup: add css_free helper Chen Ridong
  2025-08-26  3:40 ` [PATCH cgroup/for-next 3/3] cgroup: rename css offline related functions Chen Ridong
  2 siblings, 2 replies; 7+ messages in thread
From: Chen Ridong @ 2025-08-26  3:40 UTC (permalink / raw)
  To: tj, hannes, mkoutny; +Cc: cgroups, linux-kernel, lujialin4, chenridong

From: Chen Ridong <chenridong@huawei.com>

Both online_cnt and nr_descendants can indicate whether acitive
descendants exist. To make code simple, remove redundancy online_cnt,
use nr_descendants instead.

Signed-off-by: Chen Ridong <chenridong@huawei.com>
---
 include/linux/cgroup-defs.h |  6 ------
 kernel/cgroup/cgroup.c      | 13 ++++---------
 kernel/cgroup/debug.c       |  2 +-
 3 files changed, 5 insertions(+), 16 deletions(-)

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 6b93a64115fe..d084c5c34c09 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -210,12 +210,6 @@ struct cgroup_subsys_state {
 	 */
 	u64 serial_nr;
 
-	/*
-	 * Incremented by online self and children.  Used to guarantee that
-	 * parents are not offlined before their children.
-	 */
-	atomic_t online_cnt;
-
 	/* percpu_ref killing and RCU release */
 	struct work_struct destroy_work;
 	struct rcu_work destroy_rwork;
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 79b1d79f86a3..5eb747a038f7 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -5607,7 +5607,6 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
 	INIT_LIST_HEAD(&css->sibling);
 	INIT_LIST_HEAD(&css->children);
 	css->serial_nr = css_serial_nr_next++;
-	atomic_set(&css->online_cnt, 0);
 
 	if (cgroup_parent(cgrp)) {
 		css->parent = cgroup_css(cgroup_parent(cgrp), ss);
@@ -5631,12 +5630,8 @@ static int online_css(struct cgroup_subsys_state *css)
 		css->flags |= CSS_ONLINE;
 		rcu_assign_pointer(css->cgroup->subsys[ss->id], css);
 
-		atomic_inc(&css->online_cnt);
-		if (css->parent) {
-			atomic_inc(&css->parent->online_cnt);
-			while ((css = css->parent))
-				css->nr_descendants++;
-		}
+		while ((css = css->parent))
+			css->nr_descendants++;
 	}
 	return ret;
 }
@@ -5949,7 +5944,7 @@ static void css_killed_work_fn(struct work_struct *work)
 		css_put(css);
 		/* @css can't go away while we're holding cgroup_mutex */
 		css = css->parent;
-	} while (css && atomic_dec_and_test(&css->online_cnt));
+	} while (css && css_is_dying(css) && !css->nr_descendants);
 
 	cgroup_unlock();
 }
@@ -5960,7 +5955,7 @@ static void css_killed_ref_fn(struct percpu_ref *ref)
 	struct cgroup_subsys_state *css =
 		container_of(ref, struct cgroup_subsys_state, refcnt);
 
-	if (atomic_dec_and_test(&css->online_cnt)) {
+	if (!css->nr_descendants) {
 		INIT_WORK(&css->destroy_work, css_killed_work_fn);
 		queue_work(cgroup_offline_wq, &css->destroy_work);
 	}
diff --git a/kernel/cgroup/debug.c b/kernel/cgroup/debug.c
index 80aa3f027ac3..fc2f3216173a 100644
--- a/kernel/cgroup/debug.c
+++ b/kernel/cgroup/debug.c
@@ -226,7 +226,7 @@ static int cgroup_subsys_states_read(struct seq_file *seq, void *v)
 				 css->parent->id);
 		seq_printf(seq, "%2d: %-4s\t- %p[%d] %d%s\n", ss->id, ss->name,
 			  css, css->id,
-			  atomic_read(&css->online_cnt), pbuf);
+			  css->nr_descendants, pbuf);
 	}
 
 	cgroup_kn_unlock(of->kn);
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH cgroup/for-next 2/3] cgroup: add css_free helper
  2025-08-26  3:40 [PATCH cgroup/for-next 0/3] Improve clarity of cgroup destruction process Chen Ridong
  2025-08-26  3:40 ` [PATCH cgroup/for-next 1/3] cgroup: remove redundancy online_cnt Chen Ridong
@ 2025-08-26  3:40 ` Chen Ridong
  2025-08-26  3:40 ` [PATCH cgroup/for-next 3/3] cgroup: rename css offline related functions Chen Ridong
  2 siblings, 0 replies; 7+ messages in thread
From: Chen Ridong @ 2025-08-26  3:40 UTC (permalink / raw)
  To: tj, hannes, mkoutny; +Cc: cgroups, linux-kernel, lujialin4, chenridong

From: Chen Ridong <chenridong@huawei.com>

The cgroup_destroy_wq has been split into three separate workqueues:
cgroup_offline_wq, cgroup_release_wq, and cgroup_free_wq. The cgroup
destroy work is now enqueued sequentially into these queues. To clarify
the three distinct stages of the destruction process, it will introduce
helper functions or rename existing ones accordingly. The resulting
structure is as follows:

work		workqueue		work_fn
css_offline	cgroup_offline_wq	css_offline_work_fn
css_release	cgroup_release_wq	css_release_work_fn
css_free	cgroup_free_wq		css_free_rwork_fn

This patch is to add css_free helper.

Signed-off-by: Chen Ridong <chenridong@huawei.com>
---
 kernel/cgroup/cgroup.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 5eb747a038f7..ad1f6a59a27b 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -5517,6 +5517,12 @@ static void css_free_rwork_fn(struct work_struct *work)
 	}
 }
 
+static void css_free(struct cgroup_subsys_state *css)
+{
+	INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn);
+	queue_rcu_work(cgroup_free_wq, &css->destroy_rwork);
+}
+
 static void css_release_work_fn(struct work_struct *work)
 {
 	struct cgroup_subsys_state *css =
@@ -5580,8 +5586,7 @@ static void css_release_work_fn(struct work_struct *work)
 
 	cgroup_unlock();
 
-	INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn);
-	queue_rcu_work(cgroup_free_wq, &css->destroy_rwork);
+	css_free(css);
 }
 
 static void css_release(struct percpu_ref *ref)
@@ -5718,8 +5723,7 @@ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
 err_list_del:
 	list_del_rcu(&css->sibling);
 err_free_css:
-	INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn);
-	queue_rcu_work(cgroup_free_wq, &css->destroy_rwork);
+	css_free(css);
 	return ERR_PTR(err);
 }
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH cgroup/for-next 3/3] cgroup: rename css offline related functions
  2025-08-26  3:40 [PATCH cgroup/for-next 0/3] Improve clarity of cgroup destruction process Chen Ridong
  2025-08-26  3:40 ` [PATCH cgroup/for-next 1/3] cgroup: remove redundancy online_cnt Chen Ridong
  2025-08-26  3:40 ` [PATCH cgroup/for-next 2/3] cgroup: add css_free helper Chen Ridong
@ 2025-08-26  3:40 ` Chen Ridong
  2 siblings, 0 replies; 7+ messages in thread
From: Chen Ridong @ 2025-08-26  3:40 UTC (permalink / raw)
  To: tj, hannes, mkoutny; +Cc: cgroups, linux-kernel, lujialin4, chenridong

From: Chen Ridong <chenridong@huawei.com>

The cgroup_destroy_wq has been split into three separate workqueues:
cgroup_offline_wq, cgroup_release_wq, and cgroup_free_wq. The cgroup
destroy work is now enqueued sequentially into these queues. To clarify
the three distinct stages of the destruction process, it will introduce
helper functions or rename existing ones accordingly. The resulting
structure is as follows:

work            workqueue               work_fn
css_offline     cgroup_offline_wq       css_offline_work_fn
css_release     cgroup_release_wq       css_release_work_fn
css_free        cgroup_free_wq          css_free_rwork_fn

This patch renames css_killed_ref_fn to css_offline_work_fn and
css_killed_work_fn to css_offline_work_fn to align with the new
multi-phase destruction workflow.

Signed-off-by: Chen Ridong <chenridong@huawei.com>
---
 kernel/cgroup/cgroup.c | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index ad1f6a59a27b..1c75ebad7a88 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -5447,20 +5447,20 @@ static struct cftype cgroup_psi_files[] = {
 /*
  * css destruction is four-stage process.
  *
- * 1. Destruction starts.  Killing of the percpu_ref is initiated.
- *    Implemented in kill_css().
+ * 1. kill_css: Destruction starts.  Killing of the percpu_ref is
+ *    initiated. Implemented in kill_css().
  *
- * 2. When the percpu_ref is confirmed to be visible as killed on all CPUs
- *    and thus css_tryget_online() is guaranteed to fail, the css can be
- *    offlined by invoking offline_css().  After offlining, the base ref is
- *    put.  Implemented in css_killed_work_fn().
+ * 2. css_offline: When the percpu_ref is confirmed to be visible as
+ *    killed on all CPUs and thus css_tryget_online() is guaranteed to
+ *    fail, the css can be offlined by invoking __css_offline(). After
+ *    offlining, the base ref is put. Implemented in css_offline_work_fn().
  *
- * 3. When the percpu_ref reaches zero, the only possible remaining
- *    accessors are inside RCU read sections.  css_release() schedules the
- *    RCU callback.
+ * 3. css_release: When the percpu_ref reaches zero, the only possible
+ *    remaining accessors are inside RCU read sections. css_release()
+ *    schedules the RCU callback.
  *
- * 4. After the grace period, the css can be freed.  Implemented in
- *    css_free_rwork_fn().
+ * 4. css_free: After the grace period, the css can be freed. Implemented
+ *    in css_free_rwork_fn().
  *
  * It is actually hairier because both step 2 and 4 require process context
  * and thus involve punting to css->destroy_work adding two additional
@@ -5642,7 +5642,7 @@ static int online_css(struct cgroup_subsys_state *css)
 }
 
 /* if the CSS is online, invoke ->css_offline() on it and mark it offline */
-static void offline_css(struct cgroup_subsys_state *css)
+static void __css_offline(struct cgroup_subsys_state *css)
 {
 	struct cgroup_subsys *ss = css->ss;
 
@@ -5936,7 +5936,7 @@ int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode)
  * css_tryget_online() is now guaranteed to fail.  Tell the subsystem to
  * initiate destruction and put the css ref from kill_css().
  */
-static void css_killed_work_fn(struct work_struct *work)
+static void css_offline_work_fn(struct work_struct *work)
 {
 	struct cgroup_subsys_state *css =
 		container_of(work, struct cgroup_subsys_state, destroy_work);
@@ -5944,7 +5944,7 @@ static void css_killed_work_fn(struct work_struct *work)
 	cgroup_lock();
 
 	do {
-		offline_css(css);
+		__css_offline(css);
 		css_put(css);
 		/* @css can't go away while we're holding cgroup_mutex */
 		css = css->parent;
@@ -5954,13 +5954,13 @@ static void css_killed_work_fn(struct work_struct *work)
 }
 
 /* css kill confirmation processing requires process context, bounce */
-static void css_killed_ref_fn(struct percpu_ref *ref)
+static void css_offline(struct percpu_ref *ref)
 {
 	struct cgroup_subsys_state *css =
 		container_of(ref, struct cgroup_subsys_state, refcnt);
 
 	if (!css->nr_descendants) {
-		INIT_WORK(&css->destroy_work, css_killed_work_fn);
+		INIT_WORK(&css->destroy_work, css_offline_work_fn);
 		queue_work(cgroup_offline_wq, &css->destroy_work);
 	}
 }
@@ -6011,7 +6011,7 @@ static void kill_css(struct cgroup_subsys_state *css)
 	 * Use percpu_ref_kill_and_confirm() to get notifications as each
 	 * css is confirmed to be seen as killed on all CPUs.
 	 */
-	percpu_ref_kill_and_confirm(&css->refcnt, css_killed_ref_fn);
+	percpu_ref_kill_and_confirm(&css->refcnt, css_offline);
 }
 
 /**
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH cgroup/for-next 1/3] cgroup: remove redundancy online_cnt
  2025-08-26  3:40 ` [PATCH cgroup/for-next 1/3] cgroup: remove redundancy online_cnt Chen Ridong
@ 2025-08-26 14:14   ` Michal Koutný
  2025-08-27  0:54     ` Chen Ridong
  2025-08-27  0:59   ` Chen Ridong
  1 sibling, 1 reply; 7+ messages in thread
From: Michal Koutný @ 2025-08-26 14:14 UTC (permalink / raw)
  To: Chen Ridong; +Cc: tj, hannes, cgroups, linux-kernel, lujialin4, chenridong

[-- Attachment #1: Type: text/plain, Size: 1216 bytes --]

Hello Ridong.

On Tue, Aug 26, 2025 at 03:40:20AM +0000, Chen Ridong <chenridong@huaweicloud.com> wrote:
> @@ -5949,7 +5944,7 @@ static void css_killed_work_fn(struct work_struct *work)
>  		css_put(css);
>  		/* @css can't go away while we're holding cgroup_mutex */
>  		css = css->parent;
> -	} while (css && atomic_dec_and_test(&css->online_cnt));
> +	} while (css && css_is_dying(css) && !css->nr_descendants);

Here it's OK...

>  
>  	cgroup_unlock();
>  }
> @@ -5960,7 +5955,7 @@ static void css_killed_ref_fn(struct percpu_ref *ref)
>  	struct cgroup_subsys_state *css =
>  		container_of(ref, struct cgroup_subsys_state, refcnt);
>  
> -	if (atomic_dec_and_test(&css->online_cnt)) {
> +	if (!css->nr_descendants) {
>  		INIT_WORK(&css->destroy_work, css_killed_work_fn);
>  		queue_work(cgroup_offline_wq, &css->destroy_work);
>  	}

... but here in percpu_ref's confirm callback you're accessing
nr_descendants without cgroup_mutex where the atomic would have
prevented the data race.

Also the semantics of online_cnt and nr_descendants is slightly
different -- killed vs offlined. Or can you add a description why
they're same (after workqueue split)?

Thanks,
Michal

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 265 bytes --]

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH cgroup/for-next 1/3] cgroup: remove redundancy online_cnt
  2025-08-26 14:14   ` Michal Koutný
@ 2025-08-27  0:54     ` Chen Ridong
  0 siblings, 0 replies; 7+ messages in thread
From: Chen Ridong @ 2025-08-27  0:54 UTC (permalink / raw)
  To: Michal Koutný
  Cc: tj, hannes, cgroups, linux-kernel, lujialin4, chenridong



On 2025/8/26 22:14, Michal Koutný wrote:
> Hello Ridong.
> 
> On Tue, Aug 26, 2025 at 03:40:20AM +0000, Chen Ridong <chenridong@huaweicloud.com> wrote:
>> @@ -5949,7 +5944,7 @@ static void css_killed_work_fn(struct work_struct *work)
>>  		css_put(css);
>>  		/* @css can't go away while we're holding cgroup_mutex */
>>  		css = css->parent;
>> -	} while (css && atomic_dec_and_test(&css->online_cnt));
>> +	} while (css && css_is_dying(css) && !css->nr_descendants);
> 
> Here it's OK...
> 
>>  
>>  	cgroup_unlock();
>>  }
>> @@ -5960,7 +5955,7 @@ static void css_killed_ref_fn(struct percpu_ref *ref)
>>  	struct cgroup_subsys_state *css =
>>  		container_of(ref, struct cgroup_subsys_state, refcnt);
>>  
>> -	if (atomic_dec_and_test(&css->online_cnt)) {
>> +	if (!css->nr_descendants) {
>>  		INIT_WORK(&css->destroy_work, css_killed_work_fn);
>>  		queue_work(cgroup_offline_wq, &css->destroy_work);
>>  	}
> 
> ... but here in percpu_ref's confirm callback you're accessing
> nr_descendants without cgroup_mutex where the atomic would have
> prevented the data race.
> 

Thank you very much, Michal, I miss this case.

> Also the semantics of online_cnt and nr_descendants is slightly
> different -- killed vs offlined. Or can you add a description why
> they're same (after workqueue split)?
> 

The nr_descendants value does not include the dying CSS; it only reflects the number of currently
living descendants. Moreover, a CSS can only be taken offline when no living CSS remains. Therefore,
I believe the online_cnt is no longer necessary. This is unrelated to workqueue splitting.

-- 
Best regards,
Ridong


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH cgroup/for-next 1/3] cgroup: remove redundancy online_cnt
  2025-08-26  3:40 ` [PATCH cgroup/for-next 1/3] cgroup: remove redundancy online_cnt Chen Ridong
  2025-08-26 14:14   ` Michal Koutný
@ 2025-08-27  0:59   ` Chen Ridong
  1 sibling, 0 replies; 7+ messages in thread
From: Chen Ridong @ 2025-08-27  0:59 UTC (permalink / raw)
  To: tj, hannes, mkoutny; +Cc: cgroups, linux-kernel, lujialin4, chenridong



On 2025/8/26 11:40, Chen Ridong wrote:

> @@ -5949,7 +5944,7 @@ static void css_killed_work_fn(struct work_struct *work)
>  		css_put(css);
>  		/* @css can't go away while we're holding cgroup_mutex */
>  		css = css->parent;
> -	} while (css && atomic_dec_and_test(&css->online_cnt));
> +	} while (css && css_is_dying(css) && !css->nr_descendants);
>  
>  	cgroup_unlock();
>  }
> @@ -5960,7 +5955,7 @@ static void css_killed_ref_fn(struct percpu_ref *ref)
>  	struct cgroup_subsys_state *css =
>  		container_of(ref, struct cgroup_subsys_state, refcnt);
>  
> -	if (atomic_dec_and_test(&css->online_cnt)) {
> +	if (!css->nr_descendants) {
>  		INIT_WORK(&css->destroy_work, css_killed_work_fn);
>  		queue_work(cgroup_offline_wq, &css->destroy_work);
>  	}

Hi Michal,

Thank you point out the data race issue, Can I modify the code just like:

@@ -5944,12 +5939,13 @@ static void css_killed_work_fn(struct work_struct *work)

        cgroup_lock();

-       do {
+       /* The CSS can only be taken offline when it has no living descendants. */
+       while (css && css_is_dying(css) && !css->nr_descendants) {
                offline_css(css);
                css_put(css);
                /* @css can't go away while we're holding cgroup_mutex */
                css = css->parent;
-       } while (css && atomic_dec_and_test(&css->online_cnt));
+       }

        cgroup_unlock();
 }
@@ -5960,10 +5956,9 @@ static void css_killed_ref_fn(struct percpu_ref *ref)
        struct cgroup_subsys_state *css =
                container_of(ref, struct cgroup_subsys_state, refcnt);

-       if (atomic_dec_and_test(&css->online_cnt)) {
-               INIT_WORK(&css->destroy_work, css_killed_work_fn);
-               queue_work(cgroup_offline_wq, &css->destroy_work);
-       }
+       INIT_WORK(&css->destroy_work, css_killed_work_fn);
+       queue_work(cgroup_offline_wq, &css->destroy_work);
+
 }

-- 
Best regards,
Ridong


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2025-08-27  0:59 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-08-26  3:40 [PATCH cgroup/for-next 0/3] Improve clarity of cgroup destruction process Chen Ridong
2025-08-26  3:40 ` [PATCH cgroup/for-next 1/3] cgroup: remove redundancy online_cnt Chen Ridong
2025-08-26 14:14   ` Michal Koutný
2025-08-27  0:54     ` Chen Ridong
2025-08-27  0:59   ` Chen Ridong
2025-08-26  3:40 ` [PATCH cgroup/for-next 2/3] cgroup: add css_free helper Chen Ridong
2025-08-26  3:40 ` [PATCH cgroup/for-next 3/3] cgroup: rename css offline related functions Chen Ridong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).