public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: Waiman Long <longman@redhat.com>
Cc: Ingo Molnar <mingo@redhat.com>,
	Juri Lelli <juri.lelli@redhat.com>,
	Vincent Guittot <vincent.guittot@linaro.org>,
	Dietmar Eggemann <dietmar.eggemann@arm.com>,
	Steven Rostedt <rostedt@goodmis.org>,
	Ben Segall <bsegall@google.com>, Mel Gorman <mgorman@suse.de>,
	Daniel Bristot de Oliveira <bristot@redhat.com>,
	Bharata B Rao <bharata@linux.vnet.ibm.com>,
	Phil Auld <pauld@redhat.com>,
	Daniel Thompson <daniel.thompson@linaro.org>,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH v4] sched/debug: Use sched_debug_lock to serialize use of cgroup_path[] only
Date: Tue, 6 Apr 2021 11:15:29 +0200	[thread overview]
Message-ID: <YGwmsePipAYoAI2H@hirez.programming.kicks-ass.net> (raw)
In-Reply-To: <20210405234203.23526-1-longman@redhat.com>

On Mon, Apr 05, 2021 at 07:42:03PM -0400, Waiman Long wrote:
> The handling of sysrq key can be activated by echoing the key to
> /proc/sysrq-trigger or via the magic key sequence typed into a terminal
> that is connected to the system in some way (serial, USB or other mean).
> In the former case, the handling is done in a user context. In the
> latter case, it is likely to be in an interrupt context.

> [ 7809.796281]  </NMI>
> [ 7809.796282]  _raw_spin_lock_irqsave+0x32/0x40
> [ 7809.796283]  print_cpu+0x261/0x7c0
> [ 7809.796283]  sysrq_sched_debug_show+0x34/0x50
> [ 7809.796284]  sysrq_handle_showstate+0xc/0x20
> [ 7809.796284]  __handle_sysrq.cold.11+0x48/0xfb
> [ 7809.796285]  write_sysrq_trigger+0x2b/0x30
> [ 7809.796285]  proc_reg_write+0x39/0x60
> [ 7809.796286]  vfs_write+0xa5/0x1a0
> [ 7809.796286]  ksys_write+0x4f/0xb0
> [ 7809.796287]  do_syscall_64+0x5b/0x1a0
> [ 7809.796287]  entry_SYSCALL_64_after_hwframe+0x65/0xca
> [ 7809.796288] RIP: 0033:0x7fabe4ceb648
> 
> The purpose of sched_debug_lock is to serialize the use of the global
> cgroup_path[] buffer in print_cpu(). The rests of the printk calls don't
> need serialization from sched_debug_lock.

> The print_cpu() function has two callers - sched_debug_show() and
> sysrq_sched_debug_show(). 

So what idiot is doing sysrq and that proc file at the same time? Why is
it a problem now?

> @@ -470,16 +468,49 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
>  #endif
>  
>  #ifdef CONFIG_CGROUP_SCHED
> +static DEFINE_SPINLOCK(sched_debug_lock);
>  static char group_path[PATH_MAX];
> +static enum {
> +	TOKEN_NONE,
> +	TOKEN_ACQUIRED,
> +	TOKEN_NA	/* Not applicable */
> +} console_token = TOKEN_ACQUIRED;

> +/*
> + * All the print_cpu() callers from sched_debug_show() will be allowed
> + * to contend for sched_debug_lock and use group_path[] as their SEQ_printf()
> + * calls will be much faster. However only one print_cpu() caller from
> + * sysrq_sched_debug_show() which outputs to the console will be allowed
> + * to use group_path[]. Another parallel console writer will have to use
> + * a shorter stack buffer instead. Since the console output will be garbled
> + * anyway, truncation of some cgroup paths shouldn't be a big issue.
> + */
> +#define SEQ_printf_task_group_path(m, tg, fmt...)			\
> +{									\
> +	unsigned long flags;						\
> +	int token = m ? TOKEN_NA					\
> +		      : xchg_acquire(&console_token, TOKEN_NONE);	\
> +									\
> +	if (token == TOKEN_NONE) {					\
> +		char buf[128];						\
> +		task_group_path(tg, buf, sizeof(buf));			\
> +		SEQ_printf(m, fmt, buf);				\
> +	} else {							\
> +		spin_lock_irqsave(&sched_debug_lock, flags);		\
> +		task_group_path(tg, group_path, sizeof(group_path));	\
> +		SEQ_printf(m, fmt, group_path);				\
> +		spin_unlock_irqrestore(&sched_debug_lock, flags);	\
> +		if (token == TOKEN_ACQUIRED)				\
> +			smp_store_release(&console_token, token);	\
> +	}								\
>  }

This is disgusting... you have an open-coded test-and-set lock like
thing *AND* a spinlock, what gives?


What's wrong with something simple like this?

---
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 4b49cc2af5c4..2ac2977f3b96 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -8,8 +8,6 @@
  */
 #include "sched.h"
 
-static DEFINE_SPINLOCK(sched_debug_lock);
-
 /*
  * This allows printing both to /proc/sched_debug and
  * to the console
@@ -470,6 +468,7 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
 #endif
 
 #ifdef CONFIG_CGROUP_SCHED
+static DEFINE_SPINLOCK(group_path_lock);
 static char group_path[PATH_MAX];
 
 static char *task_group_path(struct task_group *tg)
@@ -481,6 +480,22 @@ static char *task_group_path(struct task_group *tg)
 
 	return group_path;
 }
+
+#define SEQ_printf_task_group_path(m, tg)				\
+do {									\
+	if (spin_trylock(&group_path_lock)) {				\
+		task_group_path(tg, group_path, sizeof(group_path));	\
+		SEQ_printf(m, "%s", group_path);			\
+		spin_unlock(&group_path_lock);				\
+	} else {							\
+		SEQ_printf(m, "looser!");				\
+	}
+} while (0)
+
+#else
+
+#define SEQ_printf_task_group_path(m, tg) do { } while (0)
+
 #endif
 
 static void
@@ -505,9 +520,8 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
 #ifdef CONFIG_NUMA_BALANCING
 	SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
 #endif
-#ifdef CONFIG_CGROUP_SCHED
-	SEQ_printf(m, " %s", task_group_path(task_group(p)));
-#endif
+	SEQ_printf(m, " ");
+	SEQ_printf_task_group_path(m, task_group(p));
 
 	SEQ_printf(m, "\n");
 }
@@ -541,13 +555,10 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 	struct sched_entity *last;
 	unsigned long flags;
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
 	SEQ_printf(m, "\n");
-	SEQ_printf(m, "cfs_rq[%d]:%s\n", cpu, task_group_path(cfs_rq->tg));
-#else
+	SEQ_printf(m, "cfs_rq[%d]:", cpu);
+	SEQ_printf_task_group_path(m, cfs_rq->tg);
 	SEQ_printf(m, "\n");
-	SEQ_printf(m, "cfs_rq[%d]:\n", cpu);
-#endif
 	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "exec_clock",
 			SPLIT_NS(cfs_rq->exec_clock));
 
@@ -612,13 +623,10 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 
 void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
 {
-#ifdef CONFIG_RT_GROUP_SCHED
 	SEQ_printf(m, "\n");
-	SEQ_printf(m, "rt_rq[%d]:%s\n", cpu, task_group_path(rt_rq->tg));
-#else
+	SEQ_printf(m, "rt_rq[%d]:", cpu);
+	SEQ_printf_task_group_path(m, rt_rq->tg);
 	SEQ_printf(m, "\n");
-	SEQ_printf(m, "rt_rq[%d]:\n", cpu);
-#endif
 
 #define P(x) \
 	SEQ_printf(m, "  .%-30s: %Ld\n", #x, (long long)(rt_rq->x))
@@ -666,7 +674,6 @@ void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq)
 static void print_cpu(struct seq_file *m, int cpu)
 {
 	struct rq *rq = cpu_rq(cpu);
-	unsigned long flags;
 
 #ifdef CONFIG_X86
 	{
@@ -717,13 +724,11 @@ do {									\
 	}
 #undef P
 
-	spin_lock_irqsave(&sched_debug_lock, flags);
 	print_cfs_stats(m, cpu);
 	print_rt_stats(m, cpu);
 	print_dl_stats(m, cpu);
 
 	print_rq(m, rq, cpu);
-	spin_unlock_irqrestore(&sched_debug_lock, flags);
 	SEQ_printf(m, "\n");
 }
 

  parent reply	other threads:[~2021-04-06  9:17 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-05 23:42 [PATCH v4] sched/debug: Use sched_debug_lock to serialize use of cgroup_path[] only Waiman Long
2021-04-06  0:18 ` Steven Rostedt
2021-04-06  1:57   ` Waiman Long
2021-04-06  9:15 ` Peter Zijlstra [this message]
2021-04-06 15:17   ` Waiman Long

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=YGwmsePipAYoAI2H@hirez.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=bharata@linux.vnet.ibm.com \
    --cc=bristot@redhat.com \
    --cc=bsegall@google.com \
    --cc=daniel.thompson@linaro.org \
    --cc=dietmar.eggemann@arm.com \
    --cc=juri.lelli@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=longman@redhat.com \
    --cc=mgorman@suse.de \
    --cc=mingo@redhat.com \
    --cc=pauld@redhat.com \
    --cc=rostedt@goodmis.org \
    --cc=vincent.guittot@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox