All of lore.kernel.org
 help / color / mirror / Atom feed
From: Theodore Tso <tytso@mit.edu>
To: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: tglx@linutronix.de, torvalds@linux-foundation.org,
	linux-kernel@vger.kernel.org,
	Ondrej Zary <linux@rainbow-software.org>,
	Magnus Damm <magnus.damm@gmail.com>
Subject: Re: T400 suspend/resume regression -- bisected to a mystery merge commit
Date: Thu, 1 Oct 2009 20:59:07 -0400	[thread overview]
Message-ID: <20091002005907.GA7490@mit.edu> (raw)
In-Reply-To: <200909282322.57824.rjw@sisk.pl>

So as I mentioned, using the technique that Linus suggested, I
bisected it down to this commit:

commit 75c5158f70c065b9704b924503d96e8297838f79
Author: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date:   Fri Aug 14 15:47:30 2009 +0200

    timekeeping: Update clocksource with stop_machine
    
    update_wall_time calls change_clocksource HZ times per second to check
    if a new clock source is available. In close to 100% of all calls
    there is no new clock. Replace the tick based check by an update done
    with stop_machine.
    
    Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
    Cc: Ingo Molnar <mingo@elte.hu>
    Acked-by: John Stultz <johnstul@us.ibm.com>
    Cc: Daniel Walker <dwalker@fifo99.com>
    LKML-Reference: <20090814134810.711836357@de.ibm.com>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

The commit wasn't easy to undo, given how many other changes there
were, but starting with 2.6.31-git9, which exhibited the problem, I
reverted commit 75c5158, and while I'm not 100% sure I resolved the
conflicts correctly, 2.6.31-git9 will reliable not come back from the
2nd suspend, and after I apply the following patch attempting to
revert 75c5158, the problem goes away.

						- Ted

commit 8c3ee48dabee782d470cc4c7048ea64bb8b7d1cb
Author: Theodore Ts'o <tytso@mit.edu>
Date:   Thu Oct 1 20:39:03 2009 -0400

    Revert "timekeeping: Update clocksource with stop_machine"
    
    This reverts commit 75c5158f70c065b9704b924503d96e8297838f79.
    
    Conflicts:
    
    	kernel/time/clocksource.c

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 83d2fbd..99bb031 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -292,6 +292,4 @@ static inline void update_vsyscall_tz(void)
 }
 #endif
 
-extern void timekeeping_notify(struct clocksource *clock);
-
 #endif /* _LINUX_CLOCKSOURCE_H */
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 0911334..f6b84b2 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -110,19 +110,36 @@ EXPORT_SYMBOL(timecounter_cyc2time);
 /*[Clocksource internal variables]---------
  * curr_clocksource:
  *	currently selected clocksource.
+ * next_clocksource:
+ *	pending next selected clocksource.
  * clocksource_list:
  *	linked list with the registered clocksources
- * clocksource_mutex:
- *	protects manipulations to curr_clocksource and the clocksource_list
+ * clocksource_lock:
+ *	protects manipulations to curr_clocksource and next_clocksource
+ *	and the clocksource_list
  * override_name:
  *	Name of the user-specified clocksource.
  */
 static struct clocksource *curr_clocksource;
+static struct clocksource *next_clocksource;
 static LIST_HEAD(clocksource_list);
-static DEFINE_MUTEX(clocksource_mutex);
+static DEFINE_SPINLOCK(clocksource_lock);
 static char override_name[32];
 static int finished_booting;
 
+/* clocksource_done_booting - Called near the end of core bootup
+ *
+ * Hack to avoid lots of clocksource churn at boot time.
+ * We use fs_initcall because we want this to start before
+ * device_initcall but after subsys_initcall.
+ */
+static int __init clocksource_done_booting(void)
+{
+	finished_booting = 1;
+	return 0;
+}
+fs_initcall(clocksource_done_booting);
+
 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
 static void clocksource_watchdog_work(struct work_struct *work);
 
@@ -353,7 +370,6 @@ static int clocksource_watchdog_kthread(void *data)
 	unsigned long flags;
 	LIST_HEAD(unstable);
 
-	mutex_lock(&clocksource_mutex);
 	spin_lock_irqsave(&watchdog_lock, flags);
 	list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list)
 		if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
@@ -369,7 +385,6 @@ static int clocksource_watchdog_kthread(void *data)
 		list_del_init(&cs->wd_list);
 		__clocksource_change_rating(cs, 0);
 	}
-	mutex_unlock(&clocksource_mutex);
 	return 0;
 }
 
@@ -393,16 +408,18 @@ static inline int clocksource_watchdog_kthread(void *data) { return 0; }
 void clocksource_resume(void)
 {
 	struct clocksource *cs;
+	unsigned long flags;
 
-	mutex_lock(&clocksource_mutex);
+	spin_lock_irqsave(&clocksource_lock, flags);
 
-	list_for_each_entry(cs, &clocksource_list, list)
+	list_for_each_entry(cs, &clocksource_list, list) {
 		if (cs->resume)
 			cs->resume();
+	}
 
 	clocksource_resume_watchdog();
 
-	mutex_unlock(&clocksource_mutex);
+	spin_unlock_irqrestore(&clocksource_lock, flags);
 }
 
 /**
@@ -418,11 +435,28 @@ void clocksource_touch_watchdog(void)
 }
 
 #ifdef CONFIG_GENERIC_TIME
+/**
+ * clocksource_get_next - Returns the selected clocksource
+ *
+ */
+struct clocksource *clocksource_get_next(void)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&clocksource_lock, flags);
+	if (next_clocksource && finished_booting) {
+		curr_clocksource = next_clocksource;
+		next_clocksource = NULL;
+	}
+	spin_unlock_irqrestore(&clocksource_lock, flags);
+
+	return curr_clocksource;
+}
 
 /**
  * clocksource_select - Select the best clocksource available
  *
- * Private function. Must hold clocksource_mutex when called.
+ * Private function. Must hold clocksource_lock when called.
  *
  * Select the clocksource with the best rating, or the clocksource,
  * which is selected by userspace override.
@@ -431,7 +465,7 @@ static void clocksource_select(void)
 {
 	struct clocksource *best, *cs;
 
-	if (!finished_booting || list_empty(&clocksource_list))
+	if (list_empty(&clocksource_list))
 		return;
 	/* First clocksource on the list has the best rating. */
 	best = list_first_entry(&clocksource_list, struct clocksource, list);
@@ -456,43 +490,17 @@ static void clocksource_select(void)
 			best = cs;
 		break;
 	}
-	if (curr_clocksource != best) {
-		printk(KERN_INFO "Switching to clocksource %s\n", best->name);
-		curr_clocksource = best;
-		timekeeping_notify(curr_clocksource);
-	}
+	if (curr_clocksource != best)
+		next_clocksource = best;
 }
 
 #else /* CONFIG_GENERIC_TIME */
 
-static inline void clocksource_select(void) { }
+static void clocksource_select(void) { }
 
 #endif
 
 /*
- * clocksource_done_booting - Called near the end of core bootup
- *
- * Hack to avoid lots of clocksource churn at boot time.
- * We use fs_initcall because we want this to start before
- * device_initcall but after subsys_initcall.
- */
-static int __init clocksource_done_booting(void)
-{
-	finished_booting = 1;
-
-	/*
-	 * Run the watchdog first to eliminate unstable clock sources
-	 */
-	clocksource_watchdog_kthread(NULL);
-
-	mutex_lock(&clocksource_mutex);
-	clocksource_select();
-	mutex_unlock(&clocksource_mutex);
-	return 0;
-}
-fs_initcall(clocksource_done_booting);
-
-/*
  * Enqueue the clocksource sorted by rating
  */
 static void clocksource_enqueue(struct clocksource *cs)
@@ -515,11 +523,13 @@ static void clocksource_enqueue(struct clocksource *cs)
  */
 int clocksource_register(struct clocksource *cs)
 {
-	mutex_lock(&clocksource_mutex);
+	unsigned long flags;
+
+	spin_lock_irqsave(&clocksource_lock, flags);
 	clocksource_enqueue(cs);
 	clocksource_select();
+	spin_unlock_irqrestore(&clocksource_lock, flags);
 	clocksource_enqueue_watchdog(cs);
-	mutex_unlock(&clocksource_mutex);
 	return 0;
 }
 EXPORT_SYMBOL(clocksource_register);
@@ -537,9 +547,14 @@ static void __clocksource_change_rating(struct clocksource *cs, int rating)
  */
 void clocksource_change_rating(struct clocksource *cs, int rating)
 {
-	mutex_lock(&clocksource_mutex);
-	__clocksource_change_rating(cs, rating);
-	mutex_unlock(&clocksource_mutex);
+	unsigned long flags;
+
+	spin_lock_irqsave(&clocksource_lock, flags);
+	list_del(&cs->list);
+	cs->rating = rating;
+	clocksource_enqueue(cs);
+	clocksource_select();
+	spin_unlock_irqrestore(&clocksource_lock, flags);
 }
 EXPORT_SYMBOL(clocksource_change_rating);
 
@@ -548,11 +563,13 @@ EXPORT_SYMBOL(clocksource_change_rating);
  */
 void clocksource_unregister(struct clocksource *cs)
 {
-	mutex_lock(&clocksource_mutex);
+	unsigned long flags;
+
 	clocksource_dequeue_watchdog(cs);
+	spin_lock_irqsave(&clocksource_lock, flags);
 	list_del(&cs->list);
 	clocksource_select();
-	mutex_unlock(&clocksource_mutex);
+	spin_unlock_irqrestore(&clocksource_lock, flags);
 }
 EXPORT_SYMBOL(clocksource_unregister);
 
@@ -570,9 +587,9 @@ sysfs_show_current_clocksources(struct sys_device *dev,
 {
 	ssize_t count = 0;
 
-	mutex_lock(&clocksource_mutex);
+	spin_lock_irq(&clocksource_lock);
 	count = snprintf(buf, PAGE_SIZE, "%s\n", curr_clocksource->name);
-	mutex_unlock(&clocksource_mutex);
+	spin_unlock_irq(&clocksource_lock);
 
 	return count;
 }
@@ -600,14 +617,14 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev,
 	if (buf[count-1] == '\n')
 		count--;
 
-	mutex_lock(&clocksource_mutex);
+	spin_lock_irq(&clocksource_lock);
 
 	if (count > 0)
 		memcpy(override_name, buf, count);
 	override_name[count] = 0;
 	clocksource_select();
 
-	mutex_unlock(&clocksource_mutex);
+	spin_unlock_irq(&clocksource_lock);
 
 	return ret;
 }
@@ -627,7 +644,7 @@ sysfs_show_available_clocksources(struct sys_device *dev,
 	struct clocksource *src;
 	ssize_t count = 0;
 
-	mutex_lock(&clocksource_mutex);
+	spin_lock_irq(&clocksource_lock);
 	list_for_each_entry(src, &clocksource_list, list) {
 		/*
 		 * Don't show non-HRES clocksource if the tick code is
@@ -639,7 +656,7 @@ sysfs_show_available_clocksources(struct sys_device *dev,
 				  max((ssize_t)PAGE_SIZE - count, (ssize_t)0),
 				  "%s ", src->name);
 	}
-	mutex_unlock(&clocksource_mutex);
+	spin_unlock_irq(&clocksource_lock);
 
 	count += snprintf(buf + count,
 			  max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "\n");
@@ -694,10 +711,11 @@ device_initcall(init_clocksource_sysfs);
  */
 static int __init boot_override_clocksource(char* str)
 {
-	mutex_lock(&clocksource_mutex);
+	unsigned long flags;
+	spin_lock_irqsave(&clocksource_lock, flags);
 	if (str)
 		strlcpy(override_name, str, sizeof(override_name));
-	mutex_unlock(&clocksource_mutex);
+	spin_unlock_irqrestore(&clocksource_lock, flags);
 	return 1;
 }
 
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index fb0f46f..74e2ca2 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -18,7 +18,6 @@
 #include <linux/jiffies.h>
 #include <linux/time.h>
 #include <linux/tick.h>
-#include <linux/stop_machine.h>
 
 /* Structure holding internal timekeeping values. */
 struct timekeeper {
@@ -180,7 +179,6 @@ void timekeeping_leap_insert(int leapsecond)
 }
 
 #ifdef CONFIG_GENERIC_TIME
-
 /**
  * timekeeping_forward_now - update clock to the current time
  *
@@ -353,40 +351,31 @@ EXPORT_SYMBOL(do_settimeofday);
  *
  * Accumulates current time interval and initializes new clocksource
  */
-static int change_clocksource(void *data)
+static void change_clocksource(void)
 {
 	struct clocksource *new, *old;
 
-	new = (struct clocksource *) data;
+	new = clocksource_get_next();
+
+	if (!new || timekeeper.clock == new)
+		return;
 
 	timekeeping_forward_now();
-	if (!new->enable || new->enable(new) == 0) {
-		old = timekeeper.clock;
-		timekeeper_setup_internals(new);
-		if (old->disable)
-			old->disable(old);
-	}
-	return 0;
-}
 
-/**
- * timekeeping_notify - Install a new clock source
- * @clock:		pointer to the clock source
- *
- * This function is called from clocksource.c after a new, better clock
- * source has been registered. The caller holds the clocksource_mutex.
- */
-void timekeeping_notify(struct clocksource *clock)
-{
-	if (timekeeper.clock == clock)
+	if (new->enable && !new->enable(new))
 		return;
-	stop_machine(change_clocksource, clock, NULL);
+
+	old = timekeeper.clock;
+	timekeeper_setup_internals(new);
+
+	if (old->disable)
+		old->disable(old);
+
 	tick_clock_notify();
 }
-
 #else /* GENERIC_TIME */
-
 static inline void timekeeping_forward_now(void) { }
+static inline void change_clocksource(void) { }
 
 /**
  * ktime_get - get the monotonic time in ktime_t format
@@ -427,7 +416,6 @@ void ktime_get_ts(struct timespec *ts)
 				ts->tv_nsec + tomono.tv_nsec);
 }
 EXPORT_SYMBOL_GPL(ktime_get_ts);
-
 #endif /* !GENERIC_TIME */
 
 /**
@@ -810,6 +798,7 @@ void update_wall_time(void)
 	update_xtime_cache(nsecs);
 
 	/* check to see if there is a new clocksource to use */
+	change_clocksource();
 	update_vsyscall(&xtime, timekeeper.clock);
 }
 

  reply	other threads:[~2009-10-02  0:59 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-09-26  6:57 T400 suspend/resume regression -- bisected to a mystery merge commit Theodore Ts'o
2009-09-26 16:28 ` Linus Torvalds
2009-09-27  8:08 ` Len Brown
2009-09-27 16:13 ` Rafael J. Wysocki
2009-09-28 13:51   ` Theodore Tso
2009-09-28 21:22     ` Rafael J. Wysocki
2009-10-02  0:59       ` Theodore Tso [this message]
2009-10-02  1:21         ` Linus Torvalds
2009-10-02  8:02           ` Martin Schwidefsky
2009-10-02 22:34           ` Theodore Tso
2009-10-02  6:50     ` Magnus Damm
2009-10-02 17:51       ` Rafael J. Wysocki

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20091002005907.GA7490@mit.edu \
    --to=tytso@mit.edu \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux@rainbow-software.org \
    --cc=magnus.damm@gmail.com \
    --cc=rjw@sisk.pl \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.