All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] ptrace: prefer live sibling mm over user_dumpable when task->mm is NULL
@ 2026-05-15  0:04 Hyunwoo Kim
  2026-05-15  2:18 ` Hyunwoo Kim
  0 siblings, 1 reply; 2+ messages in thread
From: Hyunwoo Kim @ 2026-05-15  0:04 UTC (permalink / raw)
  To: oleg, mingo, peterz, juri.lelli, vincent.guittot, torvalds, qsa,
	kees
  Cc: linux-kernel, imv4bel

task_still_dumpable() reads task->user_dumpable when task->mm is NULL.
That cache is written exactly once in exit_mm(): a single
get_dumpable(mm) load taken just before task->mm is cleared.

The load is not ordered against set_dumpable() writes performed by
CLONE_VM siblings, either via commit_creds() (any uid/gid/fsuid/fsgid
or capability transition) or via prctl(PR_SET_DUMPABLE). If a sibling
stores SUID_DUMP_DISABLE after the exiting thread observed
SUID_DUMP_USER, the live shared mm and the cached value diverge with
no later refresh, so task_still_dumpable() keeps returning the stale
SUID_DUMP_USER answer for the exiting task.

In the task->mm == NULL branch, walk the thread group under
rcu_read_lock and spin_trylock(&t->alloc_lock); use the first sibling
that still holds the shared mm and read its current get_dumpable().
Pin the mm's user_ns with get_user_ns() so it can outlive the
locked region (the actual ptrace_has_cap() may sleep). Keep
task->user_dumpable only as the fallback when no live sibling mm is
observable: a single-threaded exit, a PF_KTHREAD, or all siblings
simultaneously contended on alloc_lock.

Fixes: 31e62c2ebbfd ("ptrace: slightly saner 'get_dumpable()' logic")
Signed-off-by: Hyunwoo Kim <imv4bel@gmail.com>
---
 kernel/ptrace.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 130043bfc209..2b2d8402b9ee 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -272,15 +272,59 @@ static bool ptrace_has_cap(struct user_namespace *ns, unsigned int mode)
 	return ns_capable(ns, CAP_SYS_PTRACE);
 }
 
+static bool task_sibling_mm_dumpable(struct task_struct *task,
+				     unsigned int mode, bool *result)
+{
+	struct task_struct *t;
+	bool found = false;
+
+	rcu_read_lock();
+	for_each_thread(task, t) {
+		struct mm_struct *sib_mm;
+		struct user_namespace *sib_uns;
+		int sib_dumpable;
+
+		if (t == task)
+			continue;
+		if (!spin_trylock(&t->alloc_lock))
+			continue;
+		sib_mm = t->mm;
+		if (!sib_mm) {
+			spin_unlock(&t->alloc_lock);
+			continue;
+		}
+		sib_dumpable = get_dumpable(sib_mm);
+		sib_uns = get_user_ns(sib_mm->user_ns);
+		spin_unlock(&t->alloc_lock);
+
+		if (sib_dumpable == SUID_DUMP_USER)
+			*result = true;
+		else
+			*result = ptrace_has_cap(sib_uns, mode);
+		put_user_ns(sib_uns);
+		found = true;
+		break;
+	}
+	rcu_read_unlock();
+	return found;
+}
+
 static bool task_still_dumpable(struct task_struct *task, unsigned int mode)
 {
 	struct mm_struct *mm = task->mm;
+	bool sib_result;
+
 	if (mm) {
 		if (get_dumpable(mm) == SUID_DUMP_USER)
 			return true;
 		return ptrace_has_cap(mm->user_ns, mode);
 	}
 
+	/* user_dumpable can be stale; prefer a live sibling mm if any. */
+	if (!(task->flags & PF_KTHREAD) &&
+	    task_sibling_mm_dumpable(task, mode, &sib_result))
+		return sib_result;
+
 	if (task->user_dumpable)
 		return true;
 	return ptrace_has_cap(&init_user_ns, mode);
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH] ptrace: prefer live sibling mm over user_dumpable when task->mm is NULL
  2026-05-15  0:04 [PATCH] ptrace: prefer live sibling mm over user_dumpable when task->mm is NULL Hyunwoo Kim
@ 2026-05-15  2:18 ` Hyunwoo Kim
  0 siblings, 0 replies; 2+ messages in thread
From: Hyunwoo Kim @ 2026-05-15  2:18 UTC (permalink / raw)
  To: oleg, mingo, peterz, juri.lelli, vincent.guittot, torvalds, qsa,
	kees
  Cc: linux-kernel, imv4bel

On Fri, May 15, 2026 at 09:04:53AM +0900, Hyunwoo Kim wrote:
> task_still_dumpable() reads task->user_dumpable when task->mm is NULL.
> That cache is written exactly once in exit_mm(): a single
> get_dumpable(mm) load taken just before task->mm is cleared.
> 
> The load is not ordered against set_dumpable() writes performed by
> CLONE_VM siblings, either via commit_creds() (any uid/gid/fsuid/fsgid
> or capability transition) or via prctl(PR_SET_DUMPABLE). If a sibling
> stores SUID_DUMP_DISABLE after the exiting thread observed
> SUID_DUMP_USER, the live shared mm and the cached value diverge with
> no later refresh, so task_still_dumpable() keeps returning the stale
> SUID_DUMP_USER answer for the exiting task.
> 
> In the task->mm == NULL branch, walk the thread group under
> rcu_read_lock and spin_trylock(&t->alloc_lock); use the first sibling
> that still holds the shared mm and read its current get_dumpable().
> Pin the mm's user_ns with get_user_ns() so it can outlive the
> locked region (the actual ptrace_has_cap() may sleep). Keep
> task->user_dumpable only as the fallback when no live sibling mm is
> observable: a single-threaded exit, a PF_KTHREAD, or all siblings
> simultaneously contended on alloc_lock.
> 
> Fixes: 31e62c2ebbfd ("ptrace: slightly saner 'get_dumpable()' logic")
> Signed-off-by: Hyunwoo Kim <imv4bel@gmail.com>
> ---
>  kernel/ptrace.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 44 insertions(+)
> 
> diff --git a/kernel/ptrace.c b/kernel/ptrace.c
> index 130043bfc209..2b2d8402b9ee 100644
> --- a/kernel/ptrace.c
> +++ b/kernel/ptrace.c
> @@ -272,15 +272,59 @@ static bool ptrace_has_cap(struct user_namespace *ns, unsigned int mode)
>  	return ns_capable(ns, CAP_SYS_PTRACE);
>  }
>  
> +static bool task_sibling_mm_dumpable(struct task_struct *task,
> +				     unsigned int mode, bool *result)
> +{
> +	struct task_struct *t;
> +	bool found = false;
> +
> +	rcu_read_lock();
> +	for_each_thread(task, t) {
> +		struct mm_struct *sib_mm;
> +		struct user_namespace *sib_uns;
> +		int sib_dumpable;
> +
> +		if (t == task)
> +			continue;
> +		if (!spin_trylock(&t->alloc_lock))
> +			continue;
> +		sib_mm = t->mm;
> +		if (!sib_mm) {
> +			spin_unlock(&t->alloc_lock);
> +			continue;
> +		}
> +		sib_dumpable = get_dumpable(sib_mm);
> +		sib_uns = get_user_ns(sib_mm->user_ns);
> +		spin_unlock(&t->alloc_lock);
> +
> +		if (sib_dumpable == SUID_DUMP_USER)
> +			*result = true;
> +		else
> +			*result = ptrace_has_cap(sib_uns, mode);
> +		put_user_ns(sib_uns);
> +		found = true;
> +		break;
> +	}
> +	rcu_read_unlock();
> +	return found;
> +}
> +
>  static bool task_still_dumpable(struct task_struct *task, unsigned int mode)
>  {
>  	struct mm_struct *mm = task->mm;
> +	bool sib_result;
> +
>  	if (mm) {
>  		if (get_dumpable(mm) == SUID_DUMP_USER)
>  			return true;
>  		return ptrace_has_cap(mm->user_ns, mode);
>  	}
>  
> +	/* user_dumpable can be stale; prefer a live sibling mm if any. */
> +	if (!(task->flags & PF_KTHREAD) &&
> +	    task_sibling_mm_dumpable(task, mode, &sib_result))
> +		return sib_result;
> +
>  	if (task->user_dumpable)
>  		return true;
>  	return ptrace_has_cap(&init_user_ns, mode);
> -- 
> 2.43.0
> 

To avoid confusion: a v2 patch has been submitted.

https://lore.kernel.org/all/agZ_Ug3EzCYn-Jkg@v4bel/


Best regards,
Hyunwoo Kim

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2026-05-15  2:18 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-15  0:04 [PATCH] ptrace: prefer live sibling mm over user_dumpable when task->mm is NULL Hyunwoo Kim
2026-05-15  2:18 ` Hyunwoo Kim

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.