All of lore.kernel.org
 help / color / mirror / Atom feed
From: William Lee Irwin III <wli@holomorphy.com>
To: Andrew Morton <akpm@osdl.org>, Con Kolivas <kernel@kolivas.org>,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH] Instrumenting high latency
Date: Mon, 12 Jul 2004 04:31:14 -0700	[thread overview]
Message-ID: <20040712113114.GA21066@holomorphy.com> (raw)
In-Reply-To: <20040712083820.GW21066@holomorphy.com>

On Mon, Jul 12, 2004 at 01:02:59AM -0700, William Lee Irwin III wrote:
>> I suspect it's better to drop in hooks to trap those e.g. in
>> cond_resched() and cond_resched_lock().

On Mon, Jul 12, 2004 at 01:38:20AM -0700, William Lee Irwin III wrote:
> Something like this should do, modulo everything outside kernel/ and mm/.

Okay, finally, it should be possible to genericize this all, since
sched_clock() is an arch-independent interface for high-precision
timings in units of nanoseconds.

Some cleanups and fixing a minor oversight in touch_preempt_timing(),
which was that in principle things can break out of preempt_count() > 1
after checking need_resched(). Cleaner, smaller, yes. How crippled it
just got with the various potentially false negatives introduced by
using need_resched() as a hook for touch_preempt_timing() is unclear.

Also, the major oversight of nopping out inc_preempt_count() and
dec_preempt_count(), which will take out boxen, is corrected here.
Happily, some underscore-itis is also removed.

I couldn't think of a better place to put the config option for generic
use than under CONFIG_EMBEDDED, otherwise I'd have to drop it in
alongside CONFIG_PREEMPT on all arches, as it's basically more generic
than CONFIG_PREEMPT in and of itself.

I guess the other things are:
(a) do people want this sysctl-configurable instead of boot-time?
	I chose boot-time because sysctls seem to introduce more risk
	of people shooting themselves in the foot.
(b) the damn thing finally boots on my craptop, where else did it break?
(c) everything else and the kitchen sink


-- wli

Index: timing-2.6.7/include/linux/preempt.h
===================================================================
--- timing-2.6.7.orig/include/linux/preempt.h	2004-06-15 22:19:02.000000000 -0700
+++ timing-2.6.7/include/linux/preempt.h	2004-07-12 03:18:19.000000000 -0700
@@ -9,17 +9,17 @@
 #include <linux/config.h>
 #include <linux/linkage.h>
 
-#define preempt_count()	(current_thread_info()->preempt_count)
-
-#define inc_preempt_count() \
-do { \
-	preempt_count()++; \
-} while (0)
+#ifdef CONFIG_PREEMPT_TIMING
+void inc_preempt_count(void);
+void dec_preempt_count(void);
+void touch_preempt_timing(void);
+#else
+#define touch_preempt_timing()	do { } while (0)
+#define inc_preempt_count()	do { preempt_count()++; } while (0)
+#define dec_preempt_count()	do { preempt_count()--; } while (0)
+#endif
 
-#define dec_preempt_count() \
-do { \
-	preempt_count()--; \
-} while (0)
+#define preempt_count()	(current_thread_info()->preempt_count)
 
 #ifdef CONFIG_PREEMPT
 
@@ -51,9 +51,15 @@
 
 #else
 
+#ifdef CONFIG_PREEMPT_TIMING
+#define preempt_disable()		inc_preempt_count()
+#define preempt_enable_no_resched()	dec_preempt_count()
+#define preempt_enable()		dec_preempt_count()
+#else
 #define preempt_disable()		do { } while (0)
 #define preempt_enable_no_resched()	do { } while (0)
 #define preempt_enable()		do { } while (0)
+#endif
 #define preempt_check_resched()		do { } while (0)
 
 #endif
Index: timing-2.6.7/include/linux/sched.h
===================================================================
--- timing-2.6.7.orig/include/linux/sched.h	2004-06-15 22:18:57.000000000 -0700
+++ timing-2.6.7/include/linux/sched.h	2004-07-12 01:45:08.000000000 -0700
@@ -1000,7 +1000,8 @@
 	return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
 }
   
-static inline int need_resched(void)
+#define need_resched()	({ touch_preempt_timing(); __need_resched(); })
+static inline int __need_resched(void)
 {
 	return unlikely(test_thread_flag(TIF_NEED_RESCHED));
 }
Index: timing-2.6.7/init/Kconfig
===================================================================
--- timing-2.6.7.orig/init/Kconfig	2004-06-15 22:19:52.000000000 -0700
+++ timing-2.6.7/init/Kconfig	2004-07-12 03:55:15.000000000 -0700
@@ -261,6 +261,14 @@
 	  Disabling this option will cause the kernel to be built without
 	  support for epoll family of system calls.
 
+config PREEMPT_TIMING
+	bool "Non-preemptible critical section timing"
+	default n
+	help
+	  This option measures the time spent in non-preemptible critical
+	  sections and reports warnings when a boot-time configurable
+	  latency threshold is exceeded.
+
 source "drivers/block/Kconfig.iosched"
 
 config CC_OPTIMIZE_FOR_SIZE
Index: timing-2.6.7/kernel/sched.c
===================================================================
--- timing-2.6.7.orig/kernel/sched.c	2004-06-15 22:19:51.000000000 -0700
+++ timing-2.6.7/kernel/sched.c	2004-07-12 03:14:41.000000000 -0700
@@ -4029,3 +4029,74 @@
 
 EXPORT_SYMBOL(__preempt_write_lock);
 #endif /* defined(CONFIG_SMP) && defined(CONFIG_PREEMPT) */
+
+#ifdef CONFIG_PREEMPT_TIMING
+#include <linux/kallsyms.h>
+
+static int preempt_thresh;
+static DEFINE_PER_CPU(u64, preempt_timings);
+static DEFINE_PER_CPU(unsigned long, preempt_entry);
+
+static int setup_preempt_thresh(char *s)
+{
+	int thresh;
+
+	get_option(&s, &thresh);
+	if (thresh > 0) {
+		preempt_thresh = thresh;
+		printk("Preemption threshold = %dms\n", preempt_thresh);
+	}
+	return 1;
+}
+__setup("preempt_thresh=", setup_preempt_thresh);
+
+static void __touch_preempt_timing(void *addr)
+{
+	__get_cpu_var(preempt_timings) = sched_clock();
+	__get_cpu_var(preempt_entry) = (unsigned long)addr;
+}
+
+void touch_preempt_timing(void)
+{
+	if (preempt_count() > 0 && system_state == SYSTEM_RUNNING &&
+						__get_cpu_var(preempt_entry))
+		__touch_preempt_timing(__builtin_return_address(0));
+}
+EXPORT_SYMBOL(touch_preempt_timing);
+
+void inc_preempt_count(void)
+{
+	preempt_count()++;
+	if (preempt_count() == 1 && system_state == SYSTEM_RUNNING)
+		__touch_preempt_timing(__builtin_return_address(0));
+}
+EXPORT_SYMBOL(inc_preempt_count);
+
+void dec_preempt_count(void)
+{
+	if (preempt_count() == 1 && system_state == SYSTEM_RUNNING &&
+					__get_cpu_var(preempt_entry)) {
+		u64 hold;
+		unsigned long preempt_exit
+				= (unsigned long)__builtin_return_address(0);
+		hold = sched_clock() - __get_cpu_var(preempt_timings) + 999999;
+		do_div(hold, 1000000);
+		if (preempt_thresh && hold > preempt_thresh &&
+							printk_ratelimit()) {
+			printk("%lums non-preemptible critical section "
+				"violated %d ms preempt threshold "
+				"starting at ",
+				(unsigned long)hold,
+				preempt_thresh);
+			print_symbol("%s", __get_cpu_var(preempt_entry));
+			printk(" and ending at ");
+			print_symbol("%s", preempt_exit);
+			printk("\n");
+			dump_stack();
+		}
+		__get_cpu_var(preempt_entry) = 0;
+	}
+	preempt_count()--;
+}
+EXPORT_SYMBOL(dec_preempt_count);
+#endif

  reply	other threads:[~2004-07-12 11:32 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2004-07-12  6:29 [PATCH] Instrumenting high latency Con Kolivas
2004-07-12  6:37 ` Andrew Morton
2004-07-12  6:43   ` Con Kolivas
2004-07-12  6:58     ` Andrew Morton
2004-07-12  7:00   ` [PATCH] " William Lee Irwin III
2004-07-12  7:34 ` Andrew Morton
2004-07-12  8:02   ` William Lee Irwin III
2004-07-12  8:38     ` William Lee Irwin III
2004-07-12 11:31       ` William Lee Irwin III [this message]
2004-07-12 14:32         ` William Lee Irwin III
2004-07-12 17:09         ` Martin Josefsson
2004-07-12  8:29   ` Arjan van de Ven
2004-07-12  8:42     ` William Lee Irwin III
2004-07-12  8:47       ` William Lee Irwin III
2004-07-12 10:41 ` Rajput
2004-07-12 10:44   ` William Lee Irwin III
2004-07-12 14:24 ` Martin J. Bligh
2004-07-12 14:27   ` Con Kolivas
2004-07-12 14:33     ` Martin J. Bligh
2004-07-12 14:42       ` Con Kolivas
2004-07-12 14:47         ` Martin J. Bligh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20040712113114.GA21066@holomorphy.com \
    --to=wli@holomorphy.com \
    --cc=akpm@osdl.org \
    --cc=kernel@kolivas.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.