* [PATCH -mm 0/2] memcg: deuglify mm_update_next_owner()
@ 2024-06-26 15:28 Oleg Nesterov
2024-06-26 15:29 ` [PATCH 1/2] memcg: mm_update_next_owner: kill the "retry" logic Oleg Nesterov
` (2 more replies)
0 siblings, 3 replies; 9+ messages in thread
From: Oleg Nesterov @ 2024-06-26 15:28 UTC (permalink / raw)
To: Andrew Morton, Michal Hocko
Cc: Christian Brauner, Eric W. Biederman, Jens Axboe, Jinliang Zheng,
Mateusz Guzik, Matthew Wilcox, Tycho Andersen, linux-kernel
On top of mm-optimize-the-redundant-loop-of-mm_update_owner_next.patch
Michal, et al, could you review ? Compile tested only.
Oleg.
kernel/exit.c | 81 +++++++++++++++++++++++++++++++----------------------------
1 file changed, 43 insertions(+), 38 deletions(-)
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 1/2] memcg: mm_update_next_owner: kill the "retry" logic
2024-06-26 15:28 [PATCH -mm 0/2] memcg: deuglify mm_update_next_owner() Oleg Nesterov
@ 2024-06-26 15:29 ` Oleg Nesterov
2024-06-27 7:56 ` Michal Hocko
2024-06-26 15:29 ` [PATCH 2/2] memcg: mm_update_next_owner: move for_each_thread() into try_to_set_owner() Oleg Nesterov
2024-06-26 19:24 ` [PATCH -mm 0/2] memcg: deuglify mm_update_next_owner() Andrew Morton
2 siblings, 1 reply; 9+ messages in thread
From: Oleg Nesterov @ 2024-06-26 15:29 UTC (permalink / raw)
To: Andrew Morton, Michal Hocko
Cc: Christian Brauner, Eric W. Biederman, Jens Axboe, Jinliang Zheng,
Mateusz Guzik, Matthew Wilcox, Tycho Andersen, linux-kernel
Add the new helper, try_to_set_owner(), which tries to update mm->owner
once we see c->mm == mm. This way mm_update_next_owner() doesn't need to
restart the list_for_each_entry/for_each_process loops from the very
beginning if it races with exit/exec, it can just continue.
Unlike the current code, try_to_set_owner() re-checks tsk->mm == mm
before it drops tasklist_lock, so it doesn't need get/put_task_struct().
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
kernel/exit.c | 57 ++++++++++++++++++++++++---------------------------
1 file changed, 27 insertions(+), 30 deletions(-)
diff --git a/kernel/exit.c b/kernel/exit.c
index 45210443e68d..a1ef5f23d5be 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -439,6 +439,23 @@ static void coredump_task_exit(struct task_struct *tsk)
}
#ifdef CONFIG_MEMCG
+/* drops tasklist_lock if succeeds */
+static bool try_to_set_owner(struct task_struct *tsk, struct mm_struct *mm)
+{
+ bool ret = false;
+
+ task_lock(tsk);
+ if (likely(tsk->mm == mm)) {
+ /* tsk can't pass exit_mm/exec_mmap and exit */
+ read_unlock(&tasklist_lock);
+ WRITE_ONCE(mm->owner, tsk);
+ lru_gen_migrate_mm(mm);
+ ret = true;
+ }
+ task_unlock(tsk);
+ return ret;
+}
+
/*
* A task is exiting. If it owned this mm, find a new owner for the mm.
*/
@@ -446,7 +463,6 @@ void mm_update_next_owner(struct mm_struct *mm)
{
struct task_struct *c, *g, *p = current;
-retry:
/*
* If the exiting or execing task is not the owner, it's
* someone else's problem.
@@ -468,16 +484,16 @@ void mm_update_next_owner(struct mm_struct *mm)
* Search in the children
*/
list_for_each_entry(c, &p->children, sibling) {
- if (c->mm == mm)
- goto assign_new_owner;
+ if (c->mm == mm && try_to_set_owner(c, mm))
+ goto ret;
}
/*
* Search in the siblings
*/
list_for_each_entry(c, &p->real_parent->children, sibling) {
- if (c->mm == mm)
- goto assign_new_owner;
+ if (c->mm == mm && try_to_set_owner(c, mm))
+ goto ret;
}
/*
@@ -489,9 +505,11 @@ void mm_update_next_owner(struct mm_struct *mm)
if (g->flags & PF_KTHREAD)
continue;
for_each_thread(g, c) {
- if (c->mm == mm)
- goto assign_new_owner;
- if (c->mm)
+ struct mm_struct *c_mm = READ_ONCE(c->mm);
+ if (c_mm == mm) {
+ if (try_to_set_owner(c, mm))
+ goto ret;
+ } else if (c_mm)
break;
}
}
@@ -502,30 +520,9 @@ void mm_update_next_owner(struct mm_struct *mm)
* ptrace or page migration (get_task_mm()). Mark owner as NULL.
*/
WRITE_ONCE(mm->owner, NULL);
+ ret:
return;
-assign_new_owner:
- BUG_ON(c == p);
- get_task_struct(c);
- /*
- * The task_lock protects c->mm from changing.
- * We always want mm->owner->mm == mm
- */
- task_lock(c);
- /*
- * Delay read_unlock() till we have the task_lock()
- * to ensure that c does not slip away underneath us
- */
- read_unlock(&tasklist_lock);
- if (c->mm != mm) {
- task_unlock(c);
- put_task_struct(c);
- goto retry;
- }
- WRITE_ONCE(mm->owner, c);
- lru_gen_migrate_mm(mm);
- task_unlock(c);
- put_task_struct(c);
}
#endif /* CONFIG_MEMCG */
--
2.25.1.362.g51ebf55
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 2/2] memcg: mm_update_next_owner: move for_each_thread() into try_to_set_owner()
2024-06-26 15:28 [PATCH -mm 0/2] memcg: deuglify mm_update_next_owner() Oleg Nesterov
2024-06-26 15:29 ` [PATCH 1/2] memcg: mm_update_next_owner: kill the "retry" logic Oleg Nesterov
@ 2024-06-26 15:29 ` Oleg Nesterov
2024-06-27 9:02 ` Michal Hocko
2024-06-26 19:24 ` [PATCH -mm 0/2] memcg: deuglify mm_update_next_owner() Andrew Morton
2 siblings, 1 reply; 9+ messages in thread
From: Oleg Nesterov @ 2024-06-26 15:29 UTC (permalink / raw)
To: Andrew Morton, Michal Hocko
Cc: Christian Brauner, Eric W. Biederman, Jens Axboe, Jinliang Zheng,
Mateusz Guzik, Matthew Wilcox, Tycho Andersen, linux-kernel
mm_update_next_owner() checks the children / real_parent->children to
avoid the "everything else" loop in the likely case, but this won't work
if a child/sibling has a zombie leader with ->mm == NULL.
Move the for_each_thread() logic into try_to_set_owner(), if nothing else
this makes the children/siblings/everything searches more consistent.
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
kernel/exit.c | 40 ++++++++++++++++++++++++----------------
1 file changed, 24 insertions(+), 16 deletions(-)
diff --git a/kernel/exit.c b/kernel/exit.c
index a1ef5f23d5be..cc56edc1103e 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -440,7 +440,7 @@ static void coredump_task_exit(struct task_struct *tsk)
#ifdef CONFIG_MEMCG
/* drops tasklist_lock if succeeds */
-static bool try_to_set_owner(struct task_struct *tsk, struct mm_struct *mm)
+static bool __try_to_set_owner(struct task_struct *tsk, struct mm_struct *mm)
{
bool ret = false;
@@ -456,12 +456,28 @@ static bool try_to_set_owner(struct task_struct *tsk, struct mm_struct *mm)
return ret;
}
+static bool try_to_set_owner(struct task_struct *g, struct mm_struct *mm)
+{
+ struct task_struct *t;
+
+ for_each_thread(g, t) {
+ struct mm_struct *t_mm = READ_ONCE(t->mm);
+ if (t_mm == mm) {
+ if (__try_to_set_owner(t, mm))
+ return true;
+ } else if (t_mm)
+ break;
+ }
+
+ return false;
+}
+
/*
* A task is exiting. If it owned this mm, find a new owner for the mm.
*/
void mm_update_next_owner(struct mm_struct *mm)
{
- struct task_struct *c, *g, *p = current;
+ struct task_struct *g, *p = current;
/*
* If the exiting or execing task is not the owner, it's
@@ -483,19 +499,17 @@ void mm_update_next_owner(struct mm_struct *mm)
/*
* Search in the children
*/
- list_for_each_entry(c, &p->children, sibling) {
- if (c->mm == mm && try_to_set_owner(c, mm))
+ list_for_each_entry(g, &p->children, sibling) {
+ if (try_to_set_owner(g, mm))
goto ret;
}
-
/*
* Search in the siblings
*/
- list_for_each_entry(c, &p->real_parent->children, sibling) {
- if (c->mm == mm && try_to_set_owner(c, mm))
+ list_for_each_entry(g, &p->real_parent->children, sibling) {
+ if (try_to_set_owner(g, mm))
goto ret;
}
-
/*
* Search through everything else, we should not get here often.
*/
@@ -504,14 +518,8 @@ void mm_update_next_owner(struct mm_struct *mm)
break;
if (g->flags & PF_KTHREAD)
continue;
- for_each_thread(g, c) {
- struct mm_struct *c_mm = READ_ONCE(c->mm);
- if (c_mm == mm) {
- if (try_to_set_owner(c, mm))
- goto ret;
- } else if (c_mm)
- break;
- }
+ if (try_to_set_owner(g, mm))
+ goto ret;
}
read_unlock(&tasklist_lock);
/*
--
2.25.1.362.g51ebf55
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH -mm 0/2] memcg: deuglify mm_update_next_owner()
2024-06-26 15:28 [PATCH -mm 0/2] memcg: deuglify mm_update_next_owner() Oleg Nesterov
2024-06-26 15:29 ` [PATCH 1/2] memcg: mm_update_next_owner: kill the "retry" logic Oleg Nesterov
2024-06-26 15:29 ` [PATCH 2/2] memcg: mm_update_next_owner: move for_each_thread() into try_to_set_owner() Oleg Nesterov
@ 2024-06-26 19:24 ` Andrew Morton
2024-06-26 20:27 ` Oleg Nesterov
2 siblings, 1 reply; 9+ messages in thread
From: Andrew Morton @ 2024-06-26 19:24 UTC (permalink / raw)
To: Oleg Nesterov
Cc: Michal Hocko, Christian Brauner, Eric W. Biederman, Jens Axboe,
Jinliang Zheng, Mateusz Guzik, Matthew Wilcox, Tycho Andersen,
linux-kernel
On Wed, 26 Jun 2024 17:28:35 +0200 Oleg Nesterov <oleg@redhat.com> wrote:
> On top of mm-optimize-the-redundant-loop-of-mm_update_owner_next.patch
What should we do with
mm-optimize-the-redundant-loop-of-mm_update_owner_next.patch? It
prevents a hard lockup splat so I'm inclined to merge it into 6.10-rcX
and cc:stable. Further improvements (this series) can be made in the
normal fashion?
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH -mm 0/2] memcg: deuglify mm_update_next_owner()
2024-06-26 19:24 ` [PATCH -mm 0/2] memcg: deuglify mm_update_next_owner() Andrew Morton
@ 2024-06-26 20:27 ` Oleg Nesterov
0 siblings, 0 replies; 9+ messages in thread
From: Oleg Nesterov @ 2024-06-26 20:27 UTC (permalink / raw)
To: Andrew Morton
Cc: Michal Hocko, Christian Brauner, Eric W. Biederman, Jens Axboe,
Jinliang Zheng, Mateusz Guzik, Matthew Wilcox, Tycho Andersen,
linux-kernel
On 06/26, Andrew Morton wrote:
>
> On Wed, 26 Jun 2024 17:28:35 +0200 Oleg Nesterov <oleg@redhat.com> wrote:
>
> > On top of mm-optimize-the-redundant-loop-of-mm_update_owner_next.patch
>
> What should we do with
> mm-optimize-the-redundant-loop-of-mm_update_owner_next.patch?
Well, if you ask me (although you shoudn't ;) I think this patch makes
sense even if it can't fix all the problems.
> Further improvements (this series) can be made in the
> normal fashion?
Yes, yes. And just in case, this series mostly tries to cleanup this code
and it doesn't really depend on that patch from Jinliang.
Oleg.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] memcg: mm_update_next_owner: kill the "retry" logic
2024-06-26 15:29 ` [PATCH 1/2] memcg: mm_update_next_owner: kill the "retry" logic Oleg Nesterov
@ 2024-06-27 7:56 ` Michal Hocko
2024-06-27 8:29 ` Oleg Nesterov
0 siblings, 1 reply; 9+ messages in thread
From: Michal Hocko @ 2024-06-27 7:56 UTC (permalink / raw)
To: Oleg Nesterov
Cc: Andrew Morton, Christian Brauner, Eric W. Biederman, Jens Axboe,
Jinliang Zheng, Mateusz Guzik, Matthew Wilcox, Tycho Andersen,
linux-kernel
On Wed 26-06-24 17:29:24, Oleg Nesterov wrote:
> @@ -446,7 +463,6 @@ void mm_update_next_owner(struct mm_struct *mm)
> {
> struct task_struct *c, *g, *p = current;
>
> -retry:
> /*
> * If the exiting or execing task is not the owner, it's
> * someone else's problem.
> @@ -468,16 +484,16 @@ void mm_update_next_owner(struct mm_struct *mm)
> * Search in the children
> */
> list_for_each_entry(c, &p->children, sibling) {
> - if (c->mm == mm)
> - goto assign_new_owner;
> + if (c->mm == mm && try_to_set_owner(c, mm))
> + goto ret;
You need to unlock tasklist_lock, right? Same for other goto ret.
Other than that the cleanup makes sense and it makes the code much more
easier to follow. It should still die but it can do so in a better
shape.
--
Michal Hocko
SUSE Labs
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] memcg: mm_update_next_owner: kill the "retry" logic
2024-06-27 7:56 ` Michal Hocko
@ 2024-06-27 8:29 ` Oleg Nesterov
2024-06-27 9:02 ` Michal Hocko
0 siblings, 1 reply; 9+ messages in thread
From: Oleg Nesterov @ 2024-06-27 8:29 UTC (permalink / raw)
To: Michal Hocko
Cc: Andrew Morton, Christian Brauner, Eric W. Biederman, Jens Axboe,
Jinliang Zheng, Mateusz Guzik, Matthew Wilcox, Tycho Andersen,
linux-kernel
Michal, thanks for looking at this,
On 06/27, Michal Hocko wrote:
>
> On Wed 26-06-24 17:29:24, Oleg Nesterov wrote:
> > @@ -446,7 +463,6 @@ void mm_update_next_owner(struct mm_struct *mm)
> > {
> > struct task_struct *c, *g, *p = current;
> >
> > -retry:
> > /*
> > * If the exiting or execing task is not the owner, it's
> > * someone else's problem.
> > @@ -468,16 +484,16 @@ void mm_update_next_owner(struct mm_struct *mm)
> > * Search in the children
> > */
> > list_for_each_entry(c, &p->children, sibling) {
> > - if (c->mm == mm)
> > - goto assign_new_owner;
> > + if (c->mm == mm && try_to_set_owner(c, mm))
> > + goto ret;
>
> You need to unlock tasklist_lock, right? Same for other goto ret.
No. From the patch
+/* drops tasklist_lock if succeeds */
+static bool try_to_set_owner(struct task_struct *tsk, struct mm_struct *mm)
+{
+ bool ret = false;
+
+ task_lock(tsk);
+ if (likely(tsk->mm == mm)) {
+ /* tsk can't pass exit_mm/exec_mmap and exit */
+ read_unlock(&tasklist_lock);
^^^^^^^^^^^^^^^^^^^^^^^^^^^
try_to_set_owner() drops tasklist right after it verifies that
tsk->mm == mm under task_lock().
> It should still die but it can do so in a better shape.
Agreed!
Oleg.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] memcg: mm_update_next_owner: kill the "retry" logic
2024-06-27 8:29 ` Oleg Nesterov
@ 2024-06-27 9:02 ` Michal Hocko
0 siblings, 0 replies; 9+ messages in thread
From: Michal Hocko @ 2024-06-27 9:02 UTC (permalink / raw)
To: Oleg Nesterov
Cc: Andrew Morton, Christian Brauner, Eric W. Biederman, Jens Axboe,
Jinliang Zheng, Mateusz Guzik, Matthew Wilcox, Tycho Andersen,
linux-kernel
On Thu 27-06-24 10:29:42, Oleg Nesterov wrote:
> Michal, thanks for looking at this,
>
> On 06/27, Michal Hocko wrote:
> >
> > On Wed 26-06-24 17:29:24, Oleg Nesterov wrote:
> > > @@ -446,7 +463,6 @@ void mm_update_next_owner(struct mm_struct *mm)
> > > {
> > > struct task_struct *c, *g, *p = current;
> > >
> > > -retry:
> > > /*
> > > * If the exiting or execing task is not the owner, it's
> > > * someone else's problem.
> > > @@ -468,16 +484,16 @@ void mm_update_next_owner(struct mm_struct *mm)
> > > * Search in the children
> > > */
> > > list_for_each_entry(c, &p->children, sibling) {
> > > - if (c->mm == mm)
> > > - goto assign_new_owner;
> > > + if (c->mm == mm && try_to_set_owner(c, mm))
> > > + goto ret;
> >
> > You need to unlock tasklist_lock, right? Same for other goto ret.
>
> No. From the patch
>
> +/* drops tasklist_lock if succeeds */
> +static bool try_to_set_owner(struct task_struct *tsk, struct mm_struct *mm)
> +{
> + bool ret = false;
> +
> + task_lock(tsk);
> + if (likely(tsk->mm == mm)) {
> + /* tsk can't pass exit_mm/exec_mmap and exit */
> + read_unlock(&tasklist_lock);
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^
>
> try_to_set_owner() drops tasklist right after it verifies that
> tsk->mm == mm under task_lock().
Yes, I am blind and the commend even explains that. I am not a propoment
of schemes where locks are released in a different function. But the
overall simplification here is worth that.
Acked-by: Michal Hocko <mhocko@suse.com>
--
Michal Hocko
SUSE Labs
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 2/2] memcg: mm_update_next_owner: move for_each_thread() into try_to_set_owner()
2024-06-26 15:29 ` [PATCH 2/2] memcg: mm_update_next_owner: move for_each_thread() into try_to_set_owner() Oleg Nesterov
@ 2024-06-27 9:02 ` Michal Hocko
0 siblings, 0 replies; 9+ messages in thread
From: Michal Hocko @ 2024-06-27 9:02 UTC (permalink / raw)
To: Oleg Nesterov
Cc: Andrew Morton, Christian Brauner, Eric W. Biederman, Jens Axboe,
Jinliang Zheng, Mateusz Guzik, Matthew Wilcox, Tycho Andersen,
linux-kernel
On Wed 26-06-24 17:29:30, Oleg Nesterov wrote:
> mm_update_next_owner() checks the children / real_parent->children to
> avoid the "everything else" loop in the likely case, but this won't work
> if a child/sibling has a zombie leader with ->mm == NULL.
>
> Move the for_each_thread() logic into try_to_set_owner(), if nothing else
> this makes the children/siblings/everything searches more consistent.
>
> Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Michal Hocko <mhocko@suse.com>
> ---
> kernel/exit.c | 40 ++++++++++++++++++++++++----------------
> 1 file changed, 24 insertions(+), 16 deletions(-)
>
> diff --git a/kernel/exit.c b/kernel/exit.c
> index a1ef5f23d5be..cc56edc1103e 100644
> --- a/kernel/exit.c
> +++ b/kernel/exit.c
> @@ -440,7 +440,7 @@ static void coredump_task_exit(struct task_struct *tsk)
>
> #ifdef CONFIG_MEMCG
> /* drops tasklist_lock if succeeds */
> -static bool try_to_set_owner(struct task_struct *tsk, struct mm_struct *mm)
> +static bool __try_to_set_owner(struct task_struct *tsk, struct mm_struct *mm)
> {
> bool ret = false;
>
> @@ -456,12 +456,28 @@ static bool try_to_set_owner(struct task_struct *tsk, struct mm_struct *mm)
> return ret;
> }
>
> +static bool try_to_set_owner(struct task_struct *g, struct mm_struct *mm)
> +{
> + struct task_struct *t;
> +
> + for_each_thread(g, t) {
> + struct mm_struct *t_mm = READ_ONCE(t->mm);
> + if (t_mm == mm) {
> + if (__try_to_set_owner(t, mm))
> + return true;
> + } else if (t_mm)
> + break;
> + }
> +
> + return false;
> +}
> +
> /*
> * A task is exiting. If it owned this mm, find a new owner for the mm.
> */
> void mm_update_next_owner(struct mm_struct *mm)
> {
> - struct task_struct *c, *g, *p = current;
> + struct task_struct *g, *p = current;
>
> /*
> * If the exiting or execing task is not the owner, it's
> @@ -483,19 +499,17 @@ void mm_update_next_owner(struct mm_struct *mm)
> /*
> * Search in the children
> */
> - list_for_each_entry(c, &p->children, sibling) {
> - if (c->mm == mm && try_to_set_owner(c, mm))
> + list_for_each_entry(g, &p->children, sibling) {
> + if (try_to_set_owner(g, mm))
> goto ret;
> }
> -
> /*
> * Search in the siblings
> */
> - list_for_each_entry(c, &p->real_parent->children, sibling) {
> - if (c->mm == mm && try_to_set_owner(c, mm))
> + list_for_each_entry(g, &p->real_parent->children, sibling) {
> + if (try_to_set_owner(g, mm))
> goto ret;
> }
> -
> /*
> * Search through everything else, we should not get here often.
> */
> @@ -504,14 +518,8 @@ void mm_update_next_owner(struct mm_struct *mm)
> break;
> if (g->flags & PF_KTHREAD)
> continue;
> - for_each_thread(g, c) {
> - struct mm_struct *c_mm = READ_ONCE(c->mm);
> - if (c_mm == mm) {
> - if (try_to_set_owner(c, mm))
> - goto ret;
> - } else if (c_mm)
> - break;
> - }
> + if (try_to_set_owner(g, mm))
> + goto ret;
> }
> read_unlock(&tasklist_lock);
> /*
> --
> 2.25.1.362.g51ebf55
--
Michal Hocko
SUSE Labs
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2024-06-27 9:02 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-06-26 15:28 [PATCH -mm 0/2] memcg: deuglify mm_update_next_owner() Oleg Nesterov
2024-06-26 15:29 ` [PATCH 1/2] memcg: mm_update_next_owner: kill the "retry" logic Oleg Nesterov
2024-06-27 7:56 ` Michal Hocko
2024-06-27 8:29 ` Oleg Nesterov
2024-06-27 9:02 ` Michal Hocko
2024-06-26 15:29 ` [PATCH 2/2] memcg: mm_update_next_owner: move for_each_thread() into try_to_set_owner() Oleg Nesterov
2024-06-27 9:02 ` Michal Hocko
2024-06-26 19:24 ` [PATCH -mm 0/2] memcg: deuglify mm_update_next_owner() Andrew Morton
2024-06-26 20:27 ` Oleg Nesterov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox