public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Thomas Gleixner <tglx@kernel.org>
To: Matthieu Baerts <matttbe@kernel.org>
Cc: "Peter Zijlstra" <peterz@infradead.org>,
	"Jiri Slaby" <jirislaby@kernel.org>,
	"Stefan Hajnoczi" <stefanha@redhat.com>,
	"Stefano Garzarella" <sgarzare@redhat.com>,
	kvm@vger.kernel.org, virtualization@lists.linux.dev,
	Netdev <netdev@vger.kernel.org>,
	rcu@vger.kernel.org, "MPTCP Linux" <mptcp@lists.linux.dev>,
	"Linux Kernel" <linux-kernel@vger.kernel.org>,
	"Shinichiro Kawasaki" <shinichiro.kawasaki@wdc.com>,
	"Paul E. McKenney" <paulmck@kernel.org>,
	"Dave Hansen" <dave.hansen@linux.intel.com>,
	luto@kernel.org, "Michal Koutný" <MKoutny@suse.com>,
	"Waiman Long" <longman@redhat.com>,
	"Marco Elver" <elver@google.com>
Subject: Re: Stalls when starting a VSOCK listening socket: soft lockups, RCU stalls, timeout
Date: Mon, 09 Mar 2026 09:43:48 +0100	[thread overview]
Message-ID: <87pl5ds88r.ffs@tglx> (raw)
In-Reply-To: <57c1e171-9520-4288-9e2d-10a72a499968@kernel.org>

On Sun, Mar 08 2026 at 18:23, Matthieu Baerts wrote:
> 08 Mar 2026 17:58:26 Thomas Gleixner <tglx@kernel.org>:
>> So I'm back to square one. I go and do what I should have done in the
>> first place. Write a debug patch with trace_printks and let the people
>> who can actually trigger the problem run with it.
>
> Happy to test such debug patches!

See below.

Enable the tracepoints either on the kernel command line:

    trace_event=sched_switch,mmcid:*

or before starting the test case:

    echo 1 >/sys/kernel/tracing/events/sched/sched_switch/enable
    echo 1 >/sys/kernel/tracing/events/mmcid/enable

I added a 50ms timeout into mm_cid_get() which freezes the trace and
emits a warning. If you enable panic_on_warn and ftrace_dump_on_oops,
then it dumps the trace buffer once it hits the warning.

Either kernel command line:

   panic_on_warn ftrace_dump_on_oops

or

  echo 1 >/proc/sys/kernel/panic_on_warn
  echo 1 >/proc/sys/kernel/ftrace_dump_on_oops

That should provide enough information to decode this mystery.

Thanks,

        tglx
---
 include/trace/events/mmcid.h |  138 +++++++++++++++++++++++++++++++++++++++++++
 kernel/sched/core.c          |   10 +++
 kernel/sched/sched.h         |   20 +++++-
 3 files changed, 165 insertions(+), 3 deletions(-)

--- /dev/null
+++ b/include/trace/events/mmcid.h
@@ -0,0 +1,138 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mmcid
+
+#if !defined(_TRACE_MMCID_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MMCID_H
+
+#include <linux/sched.h>
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(mmcid_class,
+
+	TP_PROTO(struct mm_struct *mm, unsigned int cid),
+
+	TP_ARGS(mm, cid),
+
+	TP_STRUCT__entry(
+		__field( void *,	mm	)
+		__field( unsigned int,	cid	)
+	),
+
+	TP_fast_assign(
+		__entry->mm	= mm;
+		__entry->cid	= cid;
+	),
+
+	TP_printk("mm=%p cid=%08x", __entry->mm, __entry->cid)
+);
+
+DEFINE_EVENT(mmcid_class, mmcid_getcid,
+
+	TP_PROTO(struct mm_struct *mm, unsigned int cid),
+
+	TP_ARGS(mm, cid)
+);
+
+DEFINE_EVENT(mmcid_class, mmcid_putcid,
+
+	TP_PROTO(struct mm_struct *mm, unsigned int cid),
+
+	TP_ARGS(mm, cid)
+);
+
+DECLARE_EVENT_CLASS(mmcid_task_class,
+
+	TP_PROTO(struct task_struct *t, struct mm_struct *mm, unsigned int cid),
+
+	TP_ARGS(t, mm, cid),
+
+	TP_STRUCT__entry(
+		__field( void *,	t	)
+		__field( void *,	mm	)
+		__field( unsigned int,	cid	)
+	),
+
+	TP_fast_assign(
+		__entry->t	= t;
+		__entry->mm	= mm;
+		__entry->cid	= cid;
+	),
+
+	TP_printk("t=%p mm=%p cid=%08x", __entry->t, __entry->mm, __entry->cid)
+);
+
+DEFINE_EVENT(mmcid_task_class, mmcid_task_update,
+
+	TP_PROTO(struct task_struct *t, struct mm_struct *mm, unsigned int cid),
+
+	TP_ARGS(t, mm, cid)
+);
+
+DECLARE_EVENT_CLASS(mmcid_cpu_class,
+
+	TP_PROTO(unsigned int cpu, struct mm_struct *mm, unsigned int cid),
+
+	TP_ARGS(cpu, mm, cid),
+
+	TP_STRUCT__entry(
+		__field( unsigned int,	cpu	)
+		__field( void *,	mm	)
+		__field( unsigned int,	cid	)
+	),
+
+	TP_fast_assign(
+		__entry->cpu	= cpu;
+		__entry->mm	= mm;
+		__entry->cid	= cid;
+	),
+
+	TP_printk("cpu=%u mm=%p cid=%08x", __entry->cpu, __entry->mm, __entry->cid)
+);
+
+DEFINE_EVENT(mmcid_cpu_class, mmcid_cpu_update,
+
+	TP_PROTO(unsigned int cpu, struct mm_struct *mm, unsigned int cid),
+
+	TP_ARGS(cpu, mm, cid)
+);
+
+DECLARE_EVENT_CLASS(mmcid_user_class,
+
+	TP_PROTO(struct task_struct *t, struct mm_struct *mm),
+
+	TP_ARGS(t, mm),
+
+	TP_STRUCT__entry(
+		__field( void *,	t	)
+		__field( void *,	mm	)
+		__field( unsigned int,	users	)
+	),
+
+	TP_fast_assign(
+		__entry->t	= t;
+		__entry->mm	= mm;
+		__entry->users	= mm->mm_cid.users;
+	),
+
+	TP_printk("t=%p mm=%p users=%u", __entry->t, __entry->mm, __entry->users)
+);
+
+DEFINE_EVENT(mmcid_user_class, mmcid_user_add,
+
+	TP_PROTO(struct task_struct *t, struct mm_struct *mm),
+
+	TP_ARGS(t, mm)
+);
+
+DEFINE_EVENT(mmcid_user_class, mmcid_user_del,
+
+	TP_PROTO(struct task_struct *t, struct mm_struct *mm),
+
+	     TP_ARGS(t, mm)
+);
+
+#endif
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -86,6 +86,7 @@
 #include <linux/sched/rseq_api.h>
 #include <trace/events/sched.h>
 #include <trace/events/ipi.h>
+#include <trace/events/mmcid.h>
 #undef CREATE_TRACE_POINTS
 
 #include "sched.h"
@@ -10569,7 +10570,9 @@ static inline void mm_cid_transit_to_tas
 		unsigned int cid = cpu_cid_to_cid(t->mm_cid.cid);
 
 		t->mm_cid.cid = cid_to_transit_cid(cid);
+		trace_mmcid_task_update(t, t->mm, t->mm_cid.cid);
 		pcp->cid = t->mm_cid.cid;
+		trace_mmcid_cpu_update(task_cpu(t), t->mm, pcp->cid);
 	}
 }
 
@@ -10602,7 +10605,9 @@ static void mm_cid_fixup_cpus_to_tasks(s
 			if (!cid_in_transit(cid)) {
 				cid = cid_to_transit_cid(cid);
 				rq->curr->mm_cid.cid = cid;
+				trace_mmcid_task_update(rq->curr, rq->curr->mm, cid);
 				pcp->cid = cid;
+				trace_mmcid_cpu_update(cpu, mm, cid);
 			}
 		}
 	}
@@ -10613,7 +10618,9 @@ static inline void mm_cid_transit_to_cpu
 {
 	if (cid_on_task(t->mm_cid.cid)) {
 		t->mm_cid.cid = cid_to_transit_cid(t->mm_cid.cid);
+		trace_mmcid_task_update(t, t->mm, t->mm_cid.cid);
 		pcp->cid = t->mm_cid.cid;
+		trace_mmcid_cpu_update(task_cpu(t), t->mm, pcp->cid);
 	}
 }
 
@@ -10685,6 +10692,7 @@ static bool sched_mm_cid_add_user(struct
 {
 	t->mm_cid.active = 1;
 	mm->mm_cid.users++;
+	trace_mmcid_user_add(t, mm);
 	return mm_update_max_cids(mm);
 }
 
@@ -10727,6 +10735,7 @@ void sched_mm_cid_fork(struct task_struc
 	} else {
 		mm_cid_fixup_cpus_to_tasks(mm);
 		t->mm_cid.cid = mm_get_cid(mm);
+		trace_mmcid_task_update(t, t->mm, t->mm_cid.cid);
 	}
 }
 
@@ -10739,6 +10748,7 @@ static bool sched_mm_cid_remove_user(str
 		mm_unset_cid_on_task(t);
 	}
 	t->mm->mm_cid.users--;
+	trace_mmcid_user_del(t, t->mm);
 	return mm_update_max_cids(t->mm);
 }
 
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -75,6 +75,7 @@
 #include <linux/delayacct.h>
 #include <linux/mmu_context.h>
 
+#include <trace/events/mmcid.h>
 #include <trace/events/power.h>
 #include <trace/events/sched.h>
 
@@ -3809,6 +3810,7 @@ static __always_inline bool cid_on_task(
 
 static __always_inline void mm_drop_cid(struct mm_struct *mm, unsigned int cid)
 {
+	trace_mmcid_putcid(mm, cid);
 	clear_bit(cid, mm_cidmask(mm));
 }
 
@@ -3817,6 +3819,7 @@ static __always_inline void mm_unset_cid
 	unsigned int cid = t->mm_cid.cid;
 
 	t->mm_cid.cid = MM_CID_UNSET;
+	trace_mmcid_task_update(t, t->mm, t->mm_cid.cid);
 	if (cid_on_task(cid))
 		mm_drop_cid(t->mm, cid);
 }
@@ -3838,6 +3841,7 @@ static inline unsigned int __mm_get_cid(
 		return MM_CID_UNSET;
 	if (test_and_set_bit(cid, mm_cidmask(mm)))
 		return MM_CID_UNSET;
+	trace_mmcid_getcid(mm, cid);
 	return cid;
 }
 
@@ -3845,9 +3849,17 @@ static inline unsigned int mm_get_cid(st
 {
 	unsigned int cid = __mm_get_cid(mm, READ_ONCE(mm->mm_cid.max_cids));
 
-	while (cid == MM_CID_UNSET) {
-		cpu_relax();
-		cid = __mm_get_cid(mm, num_possible_cpus());
+	if (cid == MM_CID_UNSET) {
+		ktime_t t0 = ktime_get();
+
+		while (cid == MM_CID_UNSET) {
+			cpu_relax();
+			cid = __mm_get_cid(mm, num_possible_cpus());
+			if (ktime_get() - t0 > 50 * NSEC_PER_MSEC) {
+				tracing_off();
+				WARN_ON_ONCE(1);
+			}
+		}
 	}
 	return cid;
 }
@@ -3874,6 +3886,7 @@ static inline unsigned int mm_cid_conver
 static __always_inline void mm_cid_update_task_cid(struct task_struct *t, unsigned int cid)
 {
 	if (t->mm_cid.cid != cid) {
+		trace_mmcid_task_update(t, t->mm, cid);
 		t->mm_cid.cid = cid;
 		rseq_sched_set_ids_changed(t);
 	}
@@ -3881,6 +3894,7 @@ static __always_inline void mm_cid_updat
 
 static __always_inline void mm_cid_update_pcpu_cid(struct mm_struct *mm, unsigned int cid)
 {
+	trace_mmcid_cpu_update(smp_processor_id(), mm, cid);
 	__this_cpu_write(mm->mm_cid.pcpu->cid, cid);
 }
 

  reply	other threads:[~2026-03-09  8:43 UTC|newest]

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-02-06 11:54 Stalls when starting a VSOCK listening socket: soft lockups, RCU stalls, timeout Matthieu Baerts
2026-02-06 16:38 ` Stefano Garzarella
2026-02-06 17:13   ` Matthieu Baerts
2026-02-26 10:37 ` Jiri Slaby
2026-03-02  5:28   ` Jiri Slaby
2026-03-02 11:46     ` Peter Zijlstra
2026-03-02 14:30       ` Waiman Long
2026-03-05  7:00       ` Jiri Slaby
2026-03-05 11:53         ` Jiri Slaby
2026-03-05 12:20           ` Jiri Slaby
2026-03-05 16:16             ` Thomas Gleixner
2026-03-05 17:33               ` Jiri Slaby
2026-03-05 19:25                 ` Thomas Gleixner
2026-03-06  5:48                   ` Jiri Slaby
2026-03-06  9:57                     ` Thomas Gleixner
2026-03-06 10:16                       ` Jiri Slaby
2026-03-06 16:28                         ` Thomas Gleixner
2026-03-06 11:06                       ` Matthieu Baerts
2026-03-06 16:57                         ` Matthieu Baerts
2026-03-06 18:31                           ` Jiri Slaby
2026-03-06 18:44                             ` Matthieu Baerts
2026-03-06 21:40                           ` Matthieu Baerts
2026-03-06 15:24                       ` Peter Zijlstra
2026-03-07  9:01                         ` Thomas Gleixner
2026-03-07 22:29                           ` Thomas Gleixner
2026-03-08  9:15                             ` Thomas Gleixner
2026-03-08 16:55                               ` Jiri Slaby
2026-03-08 16:58                               ` Thomas Gleixner
2026-03-08 17:23                                 ` Matthieu Baerts
2026-03-09  8:43                                   ` Thomas Gleixner [this message]
2026-03-09 12:23                                     ` Matthieu Baerts
2026-03-10  8:09                                       ` Thomas Gleixner
2026-03-10  8:20                                         ` Thomas Gleixner
2026-03-10  8:56                                         ` Jiri Slaby
2026-03-10  9:00                                           ` Jiri Slaby
2026-03-10 10:03                                             ` Thomas Gleixner
2026-03-10 10:06                                               ` Thomas Gleixner
2026-03-10 11:24                                                 ` Matthieu Baerts
2026-03-10 11:54                                                   ` Peter Zijlstra
2026-03-10 12:28                                                     ` Thomas Gleixner
2026-03-10 13:40                                                       ` Matthieu Baerts
2026-03-10 13:47                                                         ` Thomas Gleixner
2026-03-10 15:51                                                           ` Matthieu Baerts
2026-03-03 13:23   ` Matthieu Baerts
2026-03-05  6:46     ` Jiri Slaby

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87pl5ds88r.ffs@tglx \
    --to=tglx@kernel.org \
    --cc=MKoutny@suse.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=elver@google.com \
    --cc=jirislaby@kernel.org \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=longman@redhat.com \
    --cc=luto@kernel.org \
    --cc=matttbe@kernel.org \
    --cc=mptcp@lists.linux.dev \
    --cc=netdev@vger.kernel.org \
    --cc=paulmck@kernel.org \
    --cc=peterz@infradead.org \
    --cc=rcu@vger.kernel.org \
    --cc=sgarzare@redhat.com \
    --cc=shinichiro.kawasaki@wdc.com \
    --cc=stefanha@redhat.com \
    --cc=virtualization@lists.linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox