public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Marcelo Tosatti <mtosatti@redhat.com>
To: kvm-devel <kvm@vger.kernel.org>, linux-kernel@vger.kernel.org
Cc: Karen Noel <knoel@redhat.com>, Rik van Riel <riel@redhat.com>,
	Don Zickus <dzickus@redhat.com>,
	Prarit Bhargava <prarit@redhat.com>,
	Thomas Gleixner <tglx@linutronix.de>
Subject: watchdog: print stolen time increment at softlockup detection
Date: Thu, 27 Jun 2013 23:57:23 -0300	[thread overview]
Message-ID: <20130628025723.GA12719@amt.cnet> (raw)


One possibility for a softlockup report in a Linux VM, is that the host
system is overcommitted to the point where the watchdog task is unable
to make progress (unable to touch the watchdog).

Maintain the increment in stolen time for the period of 
softlockup threshold detection (20 seconds by the default), 
and report this increment in the softlockup message.

Overcommitment is then indicated by a large stolen time increment,
accounting for more than, or for a significant percentage of the
softlockup threshold.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 05039e3..ed09d58 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -34,6 +34,8 @@ int __read_mostly watchdog_thresh = 10;
 static int __read_mostly watchdog_disabled;
 static u64 __read_mostly sample_period;
 
+#define SOFT_INTRS_PER_PERIOD 5
+
 static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
 static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
 static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
@@ -127,9 +129,51 @@ static void set_sample_period(void)
 	 * and hard thresholds) to increment before the
 	 * hardlockup detector generates a warning
 	 */
-	sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5);
+	sample_period = get_softlockup_thresh() *
+			((u64)NSEC_PER_SEC / SOFT_INTRS_PER_PERIOD);
 }
 
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+struct steal_clock_record {
+	u64 prev_stolen_time;
+	u64 stolen_time[SOFT_INTRS_PER_PERIOD];
+	int idx;
+};
+
+static DEFINE_PER_CPU(struct steal_clock_record, steal_record);
+static void record_steal_time(void)
+{
+	struct steal_clock_record *r;
+	int cpu = smp_processor_id();
+	u64 steal_time;
+	r = &per_cpu(steal_record, cpu);
+
+	steal_time = paravirt_steal_clock(cpu);
+	r->stolen_time[r->idx] = steal_time - r->prev_stolen_time;
+	r->idx++;
+	if (r->idx == SOFT_INTRS_PER_PERIOD)
+		r->idx = 0;
+	r->prev_stolen_time = steal_time;
+}
+
+static unsigned int get_accumulated_steal(int cpu)
+{
+	int idx;
+	u64 t = 0;
+	struct steal_clock_record *r = &per_cpu(steal_record, cpu);
+
+	for (idx = 0; idx < SOFT_INTRS_PER_PERIOD; idx++)
+		t += r->stolen_time[idx];
+
+	do_div(t, 1000000);
+
+	return t;
+}
+
+#else
+static void record_steal_time(void) { return; }
+#endif
+
 /* Commands for resetting the watchdog */
 static void __touch_watchdog(void)
 {
@@ -271,6 +315,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 	/* kick the hardlockup detector */
 	watchdog_interrupt_count();
 
+	/* record steal time */
+	record_steal_time();
+
 	/* kick the softlockup detector */
 	wake_up_process(__this_cpu_read(softlockup_watchdog));
 
@@ -316,6 +363,10 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 		printk(KERN_EMERG "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
 			smp_processor_id(), duration,
 			current->comm, task_pid_nr(current));
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+		printk(KERN_EMERG "soft lockup stolen time = %ums\n",
+			get_accumulated_steal(smp_processor_id()));
+#endif
 		print_modules();
 		print_irqtrace_events(current);
 		if (regs)

             reply	other threads:[~2013-06-28  2:58 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-06-28  2:57 Marcelo Tosatti [this message]
2013-06-28  8:34 ` watchdog: print stolen time increment at softlockup detection Paolo Bonzini
2013-06-28 14:12 ` Don Zickus
2013-06-28 20:37   ` Marcelo Tosatti
2013-07-03 16:44     ` Don Zickus
2013-07-04  2:15       ` Marcelo Tosatti
2013-07-04  2:32       ` Marcelo Tosatti

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20130628025723.GA12719@amt.cnet \
    --to=mtosatti@redhat.com \
    --cc=dzickus@redhat.com \
    --cc=knoel@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=prarit@redhat.com \
    --cc=riel@redhat.com \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox