From: Thomas Gleixner <tglx@kernel.org>
To: Matthieu Baerts <matttbe@kernel.org>
Cc: "Peter Zijlstra" <peterz@infradead.org>,
"Jiri Slaby" <jirislaby@kernel.org>,
"Stefan Hajnoczi" <stefanha@redhat.com>,
"Stefano Garzarella" <sgarzare@redhat.com>,
kvm@vger.kernel.org, virtualization@lists.linux.dev,
Netdev <netdev@vger.kernel.org>,
rcu@vger.kernel.org, "MPTCP Linux" <mptcp@lists.linux.dev>,
"Linux Kernel" <linux-kernel@vger.kernel.org>,
"Shinichiro Kawasaki" <shinichiro.kawasaki@wdc.com>,
"Paul E. McKenney" <paulmck@kernel.org>,
"Dave Hansen" <dave.hansen@linux.intel.com>,
luto@kernel.org, "Michal Koutný" <MKoutny@suse.com>,
"Waiman Long" <longman@redhat.com>,
"Marco Elver" <elver@google.com>
Subject: Re: Stalls when starting a VSOCK listening socket: soft lockups, RCU stalls, timeout
Date: Mon, 09 Mar 2026 09:43:48 +0100 [thread overview]
Message-ID: <87pl5ds88r.ffs@tglx> (raw)
In-Reply-To: <57c1e171-9520-4288-9e2d-10a72a499968@kernel.org>
On Sun, Mar 08 2026 at 18:23, Matthieu Baerts wrote:
> 08 Mar 2026 17:58:26 Thomas Gleixner <tglx@kernel.org>:
>> So I'm back to square one. I go and do what I should have done in the
>> first place. Write a debug patch with trace_printks and let the people
>> who can actually trigger the problem run with it.
>
> Happy to test such debug patches!
See below.
Enable the tracepoints either on the kernel command line:
trace_event=sched_switch,mmcid:*
or before starting the test case:
echo 1 >/sys/kernel/tracing/events/sched/sched_switch/enable
echo 1 >/sys/kernel/tracing/events/mmcid/enable
I added a 50ms timeout into mm_cid_get() which freezes the trace and
emits a warning. If you enable panic_on_warn and ftrace_dump_on_oops,
then it dumps the trace buffer once it hits the warning.
Either kernel command line:
panic_on_warn ftrace_dump_on_oops
or
echo 1 >/proc/sys/kernel/panic_on_warn
echo 1 >/proc/sys/kernel/ftrace_dump_on_oops
That should provide enough information to decode this mystery.
Thanks,
tglx
---
include/trace/events/mmcid.h | 138 +++++++++++++++++++++++++++++++++++++++++++
kernel/sched/core.c | 10 +++
kernel/sched/sched.h | 20 +++++-
3 files changed, 165 insertions(+), 3 deletions(-)
--- /dev/null
+++ b/include/trace/events/mmcid.h
@@ -0,0 +1,138 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mmcid
+
+#if !defined(_TRACE_MMCID_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MMCID_H
+
+#include <linux/sched.h>
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(mmcid_class,
+
+ TP_PROTO(struct mm_struct *mm, unsigned int cid),
+
+ TP_ARGS(mm, cid),
+
+ TP_STRUCT__entry(
+ __field( void *, mm )
+ __field( unsigned int, cid )
+ ),
+
+ TP_fast_assign(
+ __entry->mm = mm;
+ __entry->cid = cid;
+ ),
+
+ TP_printk("mm=%p cid=%08x", __entry->mm, __entry->cid)
+);
+
+DEFINE_EVENT(mmcid_class, mmcid_getcid,
+
+ TP_PROTO(struct mm_struct *mm, unsigned int cid),
+
+ TP_ARGS(mm, cid)
+);
+
+DEFINE_EVENT(mmcid_class, mmcid_putcid,
+
+ TP_PROTO(struct mm_struct *mm, unsigned int cid),
+
+ TP_ARGS(mm, cid)
+);
+
+DECLARE_EVENT_CLASS(mmcid_task_class,
+
+ TP_PROTO(struct task_struct *t, struct mm_struct *mm, unsigned int cid),
+
+ TP_ARGS(t, mm, cid),
+
+ TP_STRUCT__entry(
+ __field( void *, t )
+ __field( void *, mm )
+ __field( unsigned int, cid )
+ ),
+
+ TP_fast_assign(
+ __entry->t = t;
+ __entry->mm = mm;
+ __entry->cid = cid;
+ ),
+
+ TP_printk("t=%p mm=%p cid=%08x", __entry->t, __entry->mm, __entry->cid)
+);
+
+DEFINE_EVENT(mmcid_task_class, mmcid_task_update,
+
+ TP_PROTO(struct task_struct *t, struct mm_struct *mm, unsigned int cid),
+
+ TP_ARGS(t, mm, cid)
+);
+
+DECLARE_EVENT_CLASS(mmcid_cpu_class,
+
+ TP_PROTO(unsigned int cpu, struct mm_struct *mm, unsigned int cid),
+
+ TP_ARGS(cpu, mm, cid),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, cpu )
+ __field( void *, mm )
+ __field( unsigned int, cid )
+ ),
+
+ TP_fast_assign(
+ __entry->cpu = cpu;
+ __entry->mm = mm;
+ __entry->cid = cid;
+ ),
+
+ TP_printk("cpu=%u mm=%p cid=%08x", __entry->cpu, __entry->mm, __entry->cid)
+);
+
+DEFINE_EVENT(mmcid_cpu_class, mmcid_cpu_update,
+
+ TP_PROTO(unsigned int cpu, struct mm_struct *mm, unsigned int cid),
+
+ TP_ARGS(cpu, mm, cid)
+);
+
+DECLARE_EVENT_CLASS(mmcid_user_class,
+
+ TP_PROTO(struct task_struct *t, struct mm_struct *mm),
+
+ TP_ARGS(t, mm),
+
+ TP_STRUCT__entry(
+ __field( void *, t )
+ __field( void *, mm )
+ __field( unsigned int, users )
+ ),
+
+ TP_fast_assign(
+ __entry->t = t;
+ __entry->mm = mm;
+ __entry->users = mm->mm_cid.users;
+ ),
+
+ TP_printk("t=%p mm=%p users=%u", __entry->t, __entry->mm, __entry->users)
+);
+
+DEFINE_EVENT(mmcid_user_class, mmcid_user_add,
+
+ TP_PROTO(struct task_struct *t, struct mm_struct *mm),
+
+ TP_ARGS(t, mm)
+);
+
+DEFINE_EVENT(mmcid_user_class, mmcid_user_del,
+
+ TP_PROTO(struct task_struct *t, struct mm_struct *mm),
+
+ TP_ARGS(t, mm)
+);
+
+#endif
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -86,6 +86,7 @@
#include <linux/sched/rseq_api.h>
#include <trace/events/sched.h>
#include <trace/events/ipi.h>
+#include <trace/events/mmcid.h>
#undef CREATE_TRACE_POINTS
#include "sched.h"
@@ -10569,7 +10570,9 @@ static inline void mm_cid_transit_to_tas
unsigned int cid = cpu_cid_to_cid(t->mm_cid.cid);
t->mm_cid.cid = cid_to_transit_cid(cid);
+ trace_mmcid_task_update(t, t->mm, t->mm_cid.cid);
pcp->cid = t->mm_cid.cid;
+ trace_mmcid_cpu_update(task_cpu(t), t->mm, pcp->cid);
}
}
@@ -10602,7 +10605,9 @@ static void mm_cid_fixup_cpus_to_tasks(s
if (!cid_in_transit(cid)) {
cid = cid_to_transit_cid(cid);
rq->curr->mm_cid.cid = cid;
+ trace_mmcid_task_update(rq->curr, rq->curr->mm, cid);
pcp->cid = cid;
+ trace_mmcid_cpu_update(cpu, mm, cid);
}
}
}
@@ -10613,7 +10618,9 @@ static inline void mm_cid_transit_to_cpu
{
if (cid_on_task(t->mm_cid.cid)) {
t->mm_cid.cid = cid_to_transit_cid(t->mm_cid.cid);
+ trace_mmcid_task_update(t, t->mm, t->mm_cid.cid);
pcp->cid = t->mm_cid.cid;
+ trace_mmcid_cpu_update(task_cpu(t), t->mm, pcp->cid);
}
}
@@ -10685,6 +10692,7 @@ static bool sched_mm_cid_add_user(struct
{
t->mm_cid.active = 1;
mm->mm_cid.users++;
+ trace_mmcid_user_add(t, mm);
return mm_update_max_cids(mm);
}
@@ -10727,6 +10735,7 @@ void sched_mm_cid_fork(struct task_struc
} else {
mm_cid_fixup_cpus_to_tasks(mm);
t->mm_cid.cid = mm_get_cid(mm);
+ trace_mmcid_task_update(t, t->mm, t->mm_cid.cid);
}
}
@@ -10739,6 +10748,7 @@ static bool sched_mm_cid_remove_user(str
mm_unset_cid_on_task(t);
}
t->mm->mm_cid.users--;
+ trace_mmcid_user_del(t, t->mm);
return mm_update_max_cids(t->mm);
}
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -75,6 +75,7 @@
#include <linux/delayacct.h>
#include <linux/mmu_context.h>
+#include <trace/events/mmcid.h>
#include <trace/events/power.h>
#include <trace/events/sched.h>
@@ -3809,6 +3810,7 @@ static __always_inline bool cid_on_task(
static __always_inline void mm_drop_cid(struct mm_struct *mm, unsigned int cid)
{
+ trace_mmcid_putcid(mm, cid);
clear_bit(cid, mm_cidmask(mm));
}
@@ -3817,6 +3819,7 @@ static __always_inline void mm_unset_cid
unsigned int cid = t->mm_cid.cid;
t->mm_cid.cid = MM_CID_UNSET;
+ trace_mmcid_task_update(t, t->mm, t->mm_cid.cid);
if (cid_on_task(cid))
mm_drop_cid(t->mm, cid);
}
@@ -3838,6 +3841,7 @@ static inline unsigned int __mm_get_cid(
return MM_CID_UNSET;
if (test_and_set_bit(cid, mm_cidmask(mm)))
return MM_CID_UNSET;
+ trace_mmcid_getcid(mm, cid);
return cid;
}
@@ -3845,9 +3849,17 @@ static inline unsigned int mm_get_cid(st
{
unsigned int cid = __mm_get_cid(mm, READ_ONCE(mm->mm_cid.max_cids));
- while (cid == MM_CID_UNSET) {
- cpu_relax();
- cid = __mm_get_cid(mm, num_possible_cpus());
+ if (cid == MM_CID_UNSET) {
+ ktime_t t0 = ktime_get();
+
+ while (cid == MM_CID_UNSET) {
+ cpu_relax();
+ cid = __mm_get_cid(mm, num_possible_cpus());
+ if (ktime_get() - t0 > 50 * NSEC_PER_MSEC) {
+ tracing_off();
+ WARN_ON_ONCE(1);
+ }
+ }
}
return cid;
}
@@ -3874,6 +3886,7 @@ static inline unsigned int mm_cid_conver
static __always_inline void mm_cid_update_task_cid(struct task_struct *t, unsigned int cid)
{
if (t->mm_cid.cid != cid) {
+ trace_mmcid_task_update(t, t->mm, cid);
t->mm_cid.cid = cid;
rseq_sched_set_ids_changed(t);
}
@@ -3881,6 +3894,7 @@ static __always_inline void mm_cid_updat
static __always_inline void mm_cid_update_pcpu_cid(struct mm_struct *mm, unsigned int cid)
{
+ trace_mmcid_cpu_update(smp_processor_id(), mm, cid);
__this_cpu_write(mm->mm_cid.pcpu->cid, cid);
}
next prev parent reply other threads:[~2026-03-09 8:43 UTC|newest]
Thread overview: 45+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-02-06 11:54 Stalls when starting a VSOCK listening socket: soft lockups, RCU stalls, timeout Matthieu Baerts
2026-02-06 16:38 ` Stefano Garzarella
2026-02-06 17:13 ` Matthieu Baerts
2026-02-26 10:37 ` Jiri Slaby
2026-03-02 5:28 ` Jiri Slaby
2026-03-02 11:46 ` Peter Zijlstra
2026-03-02 14:30 ` Waiman Long
2026-03-05 7:00 ` Jiri Slaby
2026-03-05 11:53 ` Jiri Slaby
2026-03-05 12:20 ` Jiri Slaby
2026-03-05 16:16 ` Thomas Gleixner
2026-03-05 17:33 ` Jiri Slaby
2026-03-05 19:25 ` Thomas Gleixner
2026-03-06 5:48 ` Jiri Slaby
2026-03-06 9:57 ` Thomas Gleixner
2026-03-06 10:16 ` Jiri Slaby
2026-03-06 16:28 ` Thomas Gleixner
2026-03-06 11:06 ` Matthieu Baerts
2026-03-06 16:57 ` Matthieu Baerts
2026-03-06 18:31 ` Jiri Slaby
2026-03-06 18:44 ` Matthieu Baerts
2026-03-06 21:40 ` Matthieu Baerts
2026-03-06 15:24 ` Peter Zijlstra
2026-03-07 9:01 ` Thomas Gleixner
2026-03-07 22:29 ` Thomas Gleixner
2026-03-08 9:15 ` Thomas Gleixner
2026-03-08 16:55 ` Jiri Slaby
2026-03-08 16:58 ` Thomas Gleixner
2026-03-08 17:23 ` Matthieu Baerts
2026-03-09 8:43 ` Thomas Gleixner [this message]
2026-03-09 12:23 ` Matthieu Baerts
2026-03-10 8:09 ` Thomas Gleixner
2026-03-10 8:20 ` Thomas Gleixner
2026-03-10 8:56 ` Jiri Slaby
2026-03-10 9:00 ` Jiri Slaby
2026-03-10 10:03 ` Thomas Gleixner
2026-03-10 10:06 ` Thomas Gleixner
2026-03-10 11:24 ` Matthieu Baerts
2026-03-10 11:54 ` Peter Zijlstra
2026-03-10 12:28 ` Thomas Gleixner
2026-03-10 13:40 ` Matthieu Baerts
2026-03-10 13:47 ` Thomas Gleixner
2026-03-10 15:51 ` Matthieu Baerts
2026-03-03 13:23 ` Matthieu Baerts
2026-03-05 6:46 ` Jiri Slaby
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=87pl5ds88r.ffs@tglx \
--to=tglx@kernel.org \
--cc=MKoutny@suse.com \
--cc=dave.hansen@linux.intel.com \
--cc=elver@google.com \
--cc=jirislaby@kernel.org \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=longman@redhat.com \
--cc=luto@kernel.org \
--cc=matttbe@kernel.org \
--cc=mptcp@lists.linux.dev \
--cc=netdev@vger.kernel.org \
--cc=paulmck@kernel.org \
--cc=peterz@infradead.org \
--cc=rcu@vger.kernel.org \
--cc=sgarzare@redhat.com \
--cc=shinichiro.kawasaki@wdc.com \
--cc=stefanha@redhat.com \
--cc=virtualization@lists.linux.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.