public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 05/10] -mm  clocksource: convert generic timeofday
  2006-08-04  3:24 [PATCH 00/10] -mm generic clocksoure API dwalker
@ 2006-08-04  3:24 ` dwalker
  0 siblings, 0 replies; 31+ messages in thread
From: dwalker @ 2006-08-04  3:24 UTC (permalink / raw)
  To: akpm; +Cc: linux-kernel, johnstul

[-- Attachment #1: clocksource_more_generic.patch --]
[-- Type: text/plain, Size: 15163 bytes --]

This patch shifts some of the code around so that the time 
of day override happens inside kernel/timer.c.

The biggest timeofday changes are in update_wall_time() and
change_clocksource(). I removed the unconditional call to 
change_clocksource(), and replaced it with a single atomic
check. The atomic is asserted only when a clock change is
needed. update_callback is no longer driven from 
update_wall_time().

The fast path is now a single atomic check.


Signed-Off-By: Daniel Walker <dwalker@mvista.com>

---
 include/linux/clocksource.h |    2 
 kernel/time/clocksource.c   |  183 +++++---------------------------------------
 kernel/timer.c              |  162 +++++++++++++++++++++++++++++++++-----
 3 files changed, 164 insertions(+), 183 deletions(-)

Index: linux-2.6.17/include/linux/clocksource.h
===================================================================
--- linux-2.6.17.orig/include/linux/clocksource.h
+++ linux-2.6.17/include/linux/clocksource.h
@@ -206,6 +206,8 @@ static inline void clocksource_calculate
 
 /* used to install a new clocksource */
 extern int clocksource_register(struct clocksource*);
+extern int clocksource_sysfs_register(struct sysdev_attribute*);
+extern void clocksource_sysfs_unregister(struct sysdev_attribute*);
 extern void clocksource_rating_change(struct clocksource*);
 extern struct clocksource * clocksource_get_clock(char*);
 
Index: linux-2.6.17/kernel/time/clocksource.c
===================================================================
--- linux-2.6.17.orig/kernel/time/clocksource.c
+++ linux-2.6.17/kernel/time/clocksource.c
@@ -5,6 +5,8 @@
  *
  * Copyright (C) 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com)
  *
+ * Copyright (C) 2006 MontaVista Daniel Walker (dwalker@mvista.com)
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -21,7 +23,6 @@
  *
  * TODO WishList:
  *   o Allow clocksource drivers to be unregistered
- *   o get rid of clocksource_jiffies extern
  */
 
 #include <linux/clocksource.h>
@@ -29,51 +30,20 @@
 #include <linux/init.h>
 #include <linux/module.h>
 
-/* XXX - Would like a better way for initializing curr_clocksource */
-extern struct clocksource clocksource_jiffies;
-
 /*
  * Internally used to invert the rating, so lower is better.
  */
 #define CLOCKSOURCE_RATING(x)	(INT_MAX-x)
 
 /*[Clocksource internal variables]---------
- * curr_clocksource:
- *	currently selected clocksource. Initialized to clocksource_jiffies.
- * next_clocksource:
- *	pending next selected clocksource.
  * clocksource_list:
  *	priority list with the registered clocksources
  * clocksource_lock:
- *	protects manipulations to curr_clocksource and next_clocksource
- *	and the clocksource_list
- * override_name:
- *	Name of the user-specified clocksource.
+ * 	protects manipulations to the clocksource_list
  */
-static struct clocksource *curr_clocksource = &clocksource_jiffies;
-static struct clocksource *next_clocksource;
 static struct plist_head clocksource_list =
 	PLIST_HEAD_INIT(clocksource_list, clocksource_lock);
 static DEFINE_SPINLOCK(clocksource_lock);
-static char override_name[32];
-
-/**
- * clocksource_get_next - Returns the selected clocksource
- *
- */
-struct clocksource *clocksource_get_next(void)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&clocksource_lock, flags);
-	if (next_clocksource) {
-		curr_clocksource = next_clocksource;
-		next_clocksource = NULL;
-	}
-	spin_unlock_irqrestore(&clocksource_lock, flags);
-
-	return curr_clocksource;
-}
 
 /**
  * __is_registered - Returns a clocksource if it's registered
@@ -139,24 +109,6 @@ struct clocksource * clocksource_get_clo
 	return ret;
 }
 
-
-/**
- * select_clocksource - Finds the best registered clocksource.
- *
- * Private function. Must hold clocksource_lock when called.
- *
- * Looks through the list of registered clocksources, returning
- * the one with the highest rating value. If there is a clocksource
- * name that matches the override string, it returns that clocksource.
- */
-static struct clocksource *select_clocksource(void)
-{
-	if (!*override_name)
-		return plist_first_entry(&clocksource_list, struct clocksource,
-					 list);
-	return get_clock(override_name);
-}
-
 /**
  * clocksource_register - Used to install new clocksources
  * @t:		clocksource to be registered
@@ -181,8 +133,6 @@ int clocksource_register(struct clocksou
 	} else {
 		plist_node_init(&c->list, CLOCKSOURCE_RATING(c->rating));
  		plist_add(&c->list, &clocksource_list);
-		/* scan the registered clocksources, and pick the best one */
-		next_clocksource = select_clocksource();
 	}
 	spin_unlock_irqrestore(&clocksource_lock, flags);
 	return ret;
@@ -212,70 +162,12 @@ void clocksource_rating_change(struct cl
 	plist_node_init(&c->list, CLOCKSOURCE_RATING(c->rating));
 	plist_add(&c->list, &clocksource_list);
 
-	next_clocksource = select_clocksource();
+	/* XXX: Add block notifier to signal new rating */
 	spin_unlock_irqrestore(&clocksource_lock, flags);
 }
 EXPORT_SYMBOL(clocksource_rating_change);
 
 /**
- * sysfs_show_current_clocksources - sysfs interface for current clocksource
- * @dev:	unused
- * @buf:	char buffer to be filled with clocksource list
- *
- * Provides sysfs interface for listing current clocksource.
- */
-static ssize_t
-sysfs_show_current_clocksources(struct sys_device *dev, char *buf)
-{
-	char *curr = buf;
-
-	spin_lock_irq(&clocksource_lock);
-	curr += sprintf(curr, "%s ", curr_clocksource->name);
-	spin_unlock_irq(&clocksource_lock);
-
-	curr += sprintf(curr, "\n");
-
-	return curr - buf;
-}
-
-/**
- * sysfs_override_clocksource - interface for manually overriding clocksource
- * @dev:	unused
- * @buf:	name of override clocksource
- * @count:	length of buffer
- *
- * Takes input from sysfs interface for manually overriding the default
- * clocksource selction.
- */
-static ssize_t sysfs_override_clocksource(struct sys_device *dev,
-					  const char *buf, size_t count)
-{
-	size_t ret = count;
-	/* strings from sysfs write are not 0 terminated! */
-	if (count >= sizeof(override_name))
-		return -EINVAL;
-
-	/* strip of \n: */
-	if (buf[count-1] == '\n')
-		count--;
-	if (count < 1)
-		return -EINVAL;
-
-	spin_lock_irq(&clocksource_lock);
-
-	/* copy the name given: */
-	memcpy(override_name, buf, count);
-	override_name[count] = 0;
-
-	/* try to select it: */
-	next_clocksource = select_clocksource();
-
-	spin_unlock_irq(&clocksource_lock);
-
-	return ret;
-}
-
-/**
  * sysfs_show_available_clocksources - sysfs interface for listing clocksource
  * @dev:	unused
  * @buf:	char buffer to be filled with clocksource list
@@ -304,11 +196,8 @@ sysfs_show_available_clocksources(struct
 }
 
 /*
- * Sysfs setup bits:
+ * Generic sysfs setup bits:
  */
-static SYSDEV_ATTR(current_clocksource, 0600, sysfs_show_current_clocksources,
-		   sysfs_override_clocksource);
-
 static SYSDEV_ATTR(available_clocksource, 0600,
 		   sysfs_show_available_clocksources, NULL);
 
@@ -321,6 +210,21 @@ static struct sys_device device_clocksou
 	.cls	= &clocksource_sysclass,
 };
 
+/**
+ * clocksource_sysfs_register - interface to register a sysfs
+ *				hook under the clocksource sys_device.
+ * @attr:	sysdev_attribute created with the SYSDEV_ATTR macro.
+ *
+ * This functions should be used to create a sysfs file under
+ * the clocksource directory which will be used to show the current
+ * clock used by the code calling clocksource_sysfs_register(), and
+ * set a specific overide when written to.
+ */
+int clocksource_sysfs_register(struct sysdev_attribute * attr)
+{
+	return sysdev_create_file(&device_clocksource, attr);
+}
+
 static int __init init_clocksource_sysfs(void)
 {
 	int error = sysdev_class_register(&clocksource_sysclass);
@@ -330,52 +234,7 @@ static int __init init_clocksource_sysfs
 	if (!error)
 		error = sysdev_create_file(
 				&device_clocksource,
-				&attr_current_clocksource);
-	if (!error)
-		error = sysdev_create_file(
-				&device_clocksource,
 				&attr_available_clocksource);
 	return error;
 }
-
-device_initcall(init_clocksource_sysfs);
-
-/**
- * boot_override_clocksource - boot clock override
- * @str:	override name
- *
- * Takes a clocksource= boot argument and uses it
- * as the clocksource override name.
- */
-static int __init boot_override_clocksource(char* str)
-{
-	unsigned long flags;
-	spin_lock_irqsave(&clocksource_lock, flags);
-	if (str)
-		strlcpy(override_name, str, sizeof(override_name));
-	spin_unlock_irqrestore(&clocksource_lock, flags);
-	return 1;
-}
-
-__setup("clocksource=", boot_override_clocksource);
-
-/**
- * boot_override_clock - Compatibility layer for deprecated boot option
- * @str:	override name
- *
- * DEPRECATED! Takes a clock= boot argument and uses it
- * as the clocksource override name
- */
-static int __init boot_override_clock(char* str)
-{
-	if (!strcmp(str, "pmtmr")) {
-		printk("Warning: clock=pmtmr is deprecated. "
-			"Use clocksource=acpi_pm.\n");
-		return boot_override_clocksource("acpi_pm");
-	}
-	printk("Warning! clock= boot option is deprecated. "
-		"Use clocksource=xyz\n");
-	return boot_override_clocksource(str);
-}
-
-__setup("clock=", boot_override_clock);
+postcore_initcall(init_clocksource_sysfs);
Index: linux-2.6.17/kernel/timer.c
===================================================================
--- linux-2.6.17.orig/kernel/timer.c
+++ linux-2.6.17/kernel/timer.c
@@ -17,6 +17,8 @@
  *  2000-10-05  Implemented scalable SMP per-CPU timer handling.
  *                              Copyright (C) 2000, 2001, 2002  Ingo Molnar
  *              Designed by David S. Miller, Alexey Kuznetsov and Ingo Molnar
+ *  2006-08-03  Added usage of the generic clocksource API
+ *				Copyright (C) 2006 MontaVista, Daniel Walker
  */
 
 #include <linux/kernel_stat.h>
@@ -788,9 +790,15 @@ u64 current_tick_length(void)
 
 /* XXX - all of this timekeeping code should be later moved to time.c */
 #include <linux/clocksource.h>
-static struct clocksource *clock; /* pointer to current clocksource */
+/* pointer to current clocksource */
+static struct clocksource *clock = &clocksource_jiffies;
+static char clock_override_name[32];
+
+/* Interrupt update singaling variables */
+static atomic_t clock_check = ATOMIC_INIT(0);
 
 #ifdef CONFIG_GENERIC_TIME
+
 /**
  * __get_nsec_offset - Returns nanoseconds since last call to periodic_hook
  *
@@ -910,29 +918,120 @@ EXPORT_SYMBOL(do_settimeofday);
  *
  * Accumulates current time interval and initializes new clocksource
  */
-static int change_clocksource(void)
+static int change_clocksource(char * override)
 {
-	struct clocksource *new;
-	cycle_t now;
 	u64 nsec;
-	new = clocksource_get_next();
-	if (clock != new) {
-		now = clocksource_read(new);
-		nsec =  __get_nsec_offset();
-		timespec_add_ns(&xtime, nsec);
-
-		clock = new;
-		clock->cycle_last = now;
-		printk(KERN_INFO "Time: %s clocksource has been installed.\n",
-		       clock->name);
-		return 1;
-	} else if (clock->update_callback) {
-		return clock->update_callback();
+	cycle_t now;
+	struct clocksource *new = clocksource_get_clock(override);
+
+	now = clocksource_read(new);
+	nsec =  __get_nsec_offset();
+	timespec_add_ns(&xtime, nsec);
+
+	clock = new;
+	clock->cycle_last = now;
+	printk(KERN_INFO "Time: %s clocksource has been installed.\n",
+	       clock->name);
+
+	return 1;
+}
+
+/**
+ * sysfs_show_current_clocksources - sysfs interface for current clocksource
+ * @dev:	unused
+ * @buf:	char buffer to be filled with clocksource list
+ *
+ * Provides sysfs interface for listing the current clocksource.
+ * Locking handled inside sysfs.
+ */
+static ssize_t
+sysfs_show_current_clocksources(struct sys_device *dev, char *buf)
+{
+	return sprintf(buf, "%s \n", clock->name);
+}
+
+/**
+ * sysfs_override_clocksource - interface for manually overriding clocksource
+ * @dev:	unused
+ * @buf:	name of override clocksource
+ * @count:	length of buffer
+ *
+ * Takes input from sysfs interface for manually overriding the default
+ * clocksource selction. Locking handled inside sysfs
+ */
+static ssize_t sysfs_override_clocksource(struct sys_device *dev,
+					  const char *buf, size_t count)
+{
+	size_t ret = count;
+
+	/*
+	 * If there's already an update in progress then
+	 * we can't proceed.
+	 */
+	if (atomic_read(&clock_check))
+		return -EINVAL;
+
+	/* strings from sysfs write are not 0 terminated! */
+	if (count >= sizeof(clock_override_name))
+		return -EINVAL;
+
+	/* strip of \n: */
+	if (buf[count-1] == '\n')
+		count--;
+	if (count < 1)
+		return -EINVAL;
+
+	/* copy the name given: */
+	memcpy(clock_override_name, buf, count);
+	clock_override_name[count] = 0;
+
+	if (!clocksource_get_clock(clock_override_name)) {
+		clock_override_name[0] = 0;
+		return  -EINVAL;
 	}
-	return 0;
+
+	atomic_inc(&clock_check);
+
+	return ret;
 }
+
+/*
+ * Sysfs atrribure structure.
+ */
+static SYSDEV_ATTR(timeofday_clocksource, 0600, sysfs_show_current_clocksources,
+		   sysfs_override_clocksource);
+
+/**
+ * boot_override_clocksource - boot clock override
+ * @str:	override name
+ *
+ * Takes a clocksource= boot argument and uses it
+ * as the clocksource override name.
+ */
+static int __init boot_override_clocksource(char* str)
+{
+	if (str) {
+		/*
+		 * Make sure the clock exists.
+		 */
+		if (clocksource_get_clock(str))
+			strlcpy(clock_override_name, str,
+				sizeof(clock_override_name));
+		else {
+			printk("Time: requested clock \"%s\" doesn't exist\n",
+			       str);
+			return 0;
+		}
+	}
+	/* Signal the interrupt to update. */
+	atomic_inc(&clock_check);
+
+	return 1;
+}
+__setup("timeofday_clocksource=", boot_override_clocksource);
+
 #else
-#define change_clocksource()	do { 0; } while(0)
+#define change_clocksource(x)	do { 0; } while(0)
 #endif
 
 /**
@@ -961,7 +1060,7 @@ void __init timekeeping_init(void)
 	unsigned long flags;
 
 	write_seqlock_irqsave(&xtime_lock, flags);
-	clock = clocksource_get_next();
+	clock = clocksource_get_best_clock();
 	clocksource_calculate_interval(clock, tick_nsec);
 	clock->cycle_last = clocksource_read(clock);
 	ntp_clear();
@@ -1018,6 +1117,11 @@ static int __init timekeeping_init_devic
 	int error = sysdev_class_register(&timekeeping_sysclass);
 	if (!error)
 		error = sysdev_register(&device_timer);
+
+#ifdef CONFIG_GENERIC_TIME
+	atomic_inc(&clock_check);
+	clocksource_sysfs_register(&attr_timeofday_clocksource);
+#endif
 	return error;
 }
 
@@ -1161,10 +1265,26 @@ static void update_wall_time(void)
 	clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift;
 
 	/* check to see if there is a new clocksource to use */
-	if (change_clocksource()) {
+	if (unlikely(atomic_read(&clock_check))) {
+
+		/*
+		 * Switch to the new override clock, or the highest
+		 * rated clock.
+		 */
+		if (*clock_override_name)
+			change_clocksource(clock_override_name);
+		else
+			change_clocksource(NULL);
+
 		clock->error = 0;
 		clock->xtime_nsec = 0;
 		clocksource_calculate_interval(clock, tick_nsec);
+
+		/*
+		 * Remove the change signal
+		 */
+		atomic_dec(&clock_check);
+
 	}
 }
 

--

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH 00/10] -mm: generic clocksource API -v2
@ 2006-10-06 18:54 Daniel Walker
  2006-10-06 18:54 ` [PATCH 01/10] -mm: clocksource: increase initcall priority Daniel Walker
                   ` (10 more replies)
  0 siblings, 11 replies; 31+ messages in thread
From: Daniel Walker @ 2006-10-06 18:54 UTC (permalink / raw)
  To: akpm; +Cc: linux-kernel, johnstul

Each patch has it's own change log in the patch header, and each 
patch compiles and boots if applied individually (but in series order). 

---- Original release notes below

This patch set is meant to modify the clocksource structure and API so that it
can be used by more than just the timekeeping code. My motivation is mainly
that I feel the current clocksource interface could be used for much more
than just timekeeping. So if we keep the clocksource interface (which I think
we should) then we should get everything out of it that we can.

This set modifies the API, then converts the time keeping code over to the new
API. I also added a generic sched_clock() which just re-uses the clocksource
interface to provide a high quality scheduling clock (assuming a good
clocksource). Several ARM board just output nanoseconds based on jiffies which
is still possible with the generic sched_clock().

I tested this on SMP and UP x86, and compile tested for ARM.

Daniel

--


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH 01/10] -mm: clocksource: increase initcall priority
  2006-10-06 18:54 [PATCH 00/10] -mm: generic clocksource API -v2 Daniel Walker
@ 2006-10-06 18:54 ` Daniel Walker
  2006-10-07 15:40   ` OGAWA Hirofumi
  2006-10-09 18:50   ` john stultz
  2006-10-06 18:54 ` [PATCH 02/10] -mm: clocksource: small cleanup Daniel Walker
                   ` (9 subsequent siblings)
  10 siblings, 2 replies; 31+ messages in thread
From: Daniel Walker @ 2006-10-06 18:54 UTC (permalink / raw)
  To: akpm; +Cc: linux-kernel, johnstul

[-- Attachment #1: clocksource_init_call.patch --]
[-- Type: text/plain, Size: 4627 bytes --]

Since it's likely that this interface would get used during bootup 
I moved all the clocksource registration into the postcore initcall. 
This also eliminated some clocksource shuffling during bootup.

Signed-Off-By: Daniel Walker <dwalker@mvista.com>

---
 arch/i386/kernel/hpet.c          |    2 +-
 arch/i386/kernel/i8253.c         |    2 +-
 arch/i386/kernel/tsc.c           |    2 +-
 drivers/clocksource/acpi_pm.c    |    2 +-
 drivers/clocksource/cyclone.c    |    2 +-
 drivers/clocksource/scx200_hrt.c |    2 +-
 kernel/time/clocksource.c        |   15 +--------------
 kernel/time/jiffies.c            |    2 +-
 8 files changed, 8 insertions(+), 21 deletions(-)

Index: linux-2.6.17/arch/i386/kernel/hpet.c
===================================================================
--- linux-2.6.17.orig/arch/i386/kernel/hpet.c
+++ linux-2.6.17/arch/i386/kernel/hpet.c
@@ -64,4 +64,4 @@ static int __init init_hpet_clocksource(
 	return clocksource_register(&clocksource_hpet);
 }
 
-module_init(init_hpet_clocksource);
+postcore_initcall(init_hpet_clocksource);
Index: linux-2.6.17/arch/i386/kernel/i8253.c
===================================================================
--- linux-2.6.17.orig/arch/i386/kernel/i8253.c
+++ linux-2.6.17/arch/i386/kernel/i8253.c
@@ -115,4 +115,4 @@ static int __init init_pit_clocksource(v
 	clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20);
 	return clocksource_register(&clocksource_pit);
 }
-module_init(init_pit_clocksource);
+postcore_initcall(init_pit_clocksource);
Index: linux-2.6.17/arch/i386/kernel/tsc.c
===================================================================
--- linux-2.6.17.orig/arch/i386/kernel/tsc.c
+++ linux-2.6.17/arch/i386/kernel/tsc.c
@@ -475,4 +475,4 @@ static int __init init_tsc_clocksource(v
 	return 0;
 }
 
-module_init(init_tsc_clocksource);
+postcore_initcall(init_tsc_clocksource);
Index: linux-2.6.17/drivers/clocksource/acpi_pm.c
===================================================================
--- linux-2.6.17.orig/drivers/clocksource/acpi_pm.c
+++ linux-2.6.17/drivers/clocksource/acpi_pm.c
@@ -174,4 +174,4 @@ pm_good:
 	return clocksource_register(&clocksource_acpi_pm);
 }
 
-module_init(init_acpi_pm_clocksource);
+postcore_initcall(init_acpi_pm_clocksource);
Index: linux-2.6.17/drivers/clocksource/cyclone.c
===================================================================
--- linux-2.6.17.orig/drivers/clocksource/cyclone.c
+++ linux-2.6.17/drivers/clocksource/cyclone.c
@@ -116,4 +116,4 @@ static int __init init_cyclone_clocksour
 	return clocksource_register(&clocksource_cyclone);
 }
 
-module_init(init_cyclone_clocksource);
+postcore_initcall(init_cyclone_clocksource);
Index: linux-2.6.17/drivers/clocksource/scx200_hrt.c
===================================================================
--- linux-2.6.17.orig/drivers/clocksource/scx200_hrt.c
+++ linux-2.6.17/drivers/clocksource/scx200_hrt.c
@@ -94,7 +94,7 @@ static int __init init_hrt_clocksource(v
 	return clocksource_register(&cs_hrt);
 }
 
-module_init(init_hrt_clocksource);
+postcore_initcall(init_hrt_clocksource);
 
 MODULE_AUTHOR("Jim Cromie <jim.cromie@gmail.com>");
 MODULE_DESCRIPTION("clocksource on SCx200 HiRes Timer");
Index: linux-2.6.17/kernel/time/clocksource.c
===================================================================
--- linux-2.6.17.orig/kernel/time/clocksource.c
+++ linux-2.6.17/kernel/time/clocksource.c
@@ -50,19 +50,6 @@ static struct clocksource *next_clocksou
 static LIST_HEAD(clocksource_list);
 static DEFINE_SPINLOCK(clocksource_lock);
 static char override_name[32];
-static int finished_booting;
-
-/* clocksource_done_booting - Called near the end of bootup
- *
- * Hack to avoid lots of clocksource churn at boot time
- */
-static int __init clocksource_done_booting(void)
-{
-	finished_booting = 1;
-	return 0;
-}
-
-late_initcall(clocksource_done_booting);
 
 /**
  * clocksource_get_next - Returns the selected clocksource
@@ -73,7 +60,7 @@ struct clocksource *clocksource_get_next
 	unsigned long flags;
 
 	spin_lock_irqsave(&clocksource_lock, flags);
-	if (next_clocksource && finished_booting) {
+	if (next_clocksource) {
 		curr_clocksource = next_clocksource;
 		next_clocksource = NULL;
 	}
Index: linux-2.6.17/kernel/time/jiffies.c
===================================================================
--- linux-2.6.17.orig/kernel/time/jiffies.c
+++ linux-2.6.17/kernel/time/jiffies.c
@@ -70,4 +70,4 @@ static int __init init_jiffies_clocksour
 	return clocksource_register(&clocksource_jiffies);
 }
 
-module_init(init_jiffies_clocksource);
+postcore_initcall(init_jiffies_clocksource);

--


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH 02/10] -mm: clocksource: small cleanup
  2006-10-06 18:54 [PATCH 00/10] -mm: generic clocksource API -v2 Daniel Walker
  2006-10-06 18:54 ` [PATCH 01/10] -mm: clocksource: increase initcall priority Daniel Walker
@ 2006-10-06 18:54 ` Daniel Walker
  2006-10-09 18:51   ` john stultz
  2006-10-06 18:54 ` [PATCH 03/10] -mm: clocksource: move old API calls Daniel Walker
                   ` (8 subsequent siblings)
  10 siblings, 1 reply; 31+ messages in thread
From: Daniel Walker @ 2006-10-06 18:54 UTC (permalink / raw)
  To: akpm; +Cc: linux-kernel, johnstul

[-- Attachment #1: clocksource_cleanup.patch --]
[-- Type: text/plain, Size: 2874 bytes --]

Mostly changing alignment. Just some general cleanup.

Signed-Off-By: Daniel Walker <dwalker@mvista.com>
Acked-by: John Stultz <johnstul@us.ibm.com>

---
 include/linux/clocksource.h |    2 +-
 kernel/time/clocksource.c   |    6 +++---
 kernel/timer.c              |    7 ++++---
 3 files changed, 8 insertions(+), 7 deletions(-)

Index: linux-2.6.18/include/linux/clocksource.h
===================================================================
--- linux-2.6.18.orig/include/linux/clocksource.h
+++ linux-2.6.18/include/linux/clocksource.h
@@ -159,7 +159,7 @@ static inline s64 cyc2ns(struct clocksou
  * Unless you're the timekeeping code, you should not be using this!
  */
 static inline void clocksource_calculate_interval(struct clocksource *c,
-						unsigned long length_nsec)
+					  	  unsigned long length_nsec)
 {
 	u64 tmp;
 
Index: linux-2.6.18/kernel/time/clocksource.c
===================================================================
--- linux-2.6.18.orig/kernel/time/clocksource.c
+++ linux-2.6.18/kernel/time/clocksource.c
@@ -143,7 +143,7 @@ int clocksource_register(struct clocksou
 	/* check if clocksource is already registered */
 	if (is_registered_source(c)) {
 		printk("register_clocksource: Cannot register %s. "
-			"Already registered!", c->name);
+		       "Already registered!", c->name);
 		ret = -EBUSY;
 	} else {
 		/* register it */
@@ -262,10 +262,10 @@ sysfs_show_available_clocksources(struct
  * Sysfs setup bits:
  */
 static SYSDEV_ATTR(current_clocksource, 0600, sysfs_show_current_clocksources,
-			sysfs_override_clocksource);
+		   sysfs_override_clocksource);
 
 static SYSDEV_ATTR(available_clocksource, 0600,
-			sysfs_show_available_clocksources, NULL);
+		   sysfs_show_available_clocksources, NULL);
 
 static struct sysdev_class clocksource_sysclass = {
 	set_kset_name("clocksource"),
Index: linux-2.6.18/kernel/timer.c
===================================================================
--- linux-2.6.18.orig/kernel/timer.c
+++ linux-2.6.18/kernel/timer.c
@@ -714,7 +714,7 @@ static int change_clocksource(void)
 		clock = new;
 		clock->cycle_last = now;
 		printk(KERN_INFO "Time: %s clocksource has been installed.\n",
-					clock->name);
+		       clock->name);
 		return 1;
 	} else if (clock->update_callback) {
 		return clock->update_callback();
@@ -722,7 +722,7 @@ static int change_clocksource(void)
 	return 0;
 }
 #else
-#define change_clocksource() (0)
+#define change_clocksource()	do { 0; } while(0)
 #endif
 
 /**
@@ -940,7 +940,8 @@ static void update_wall_time(void)
 
 		/* accumulate error between NTP and clock interval */
 		clock->error += current_tick_length();
-		clock->error -= clock->xtime_interval << (TICK_LENGTH_SHIFT - clock->shift);
+		clock->error -= clock->xtime_interval <<
+				(TICK_LENGTH_SHIFT - clock->shift);
 	}
 
 	/* correct the clock when NTP error is too big */

--


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH 03/10] -mm: clocksource: move old API calls
  2006-10-06 18:54 [PATCH 00/10] -mm: generic clocksource API -v2 Daniel Walker
  2006-10-06 18:54 ` [PATCH 01/10] -mm: clocksource: increase initcall priority Daniel Walker
  2006-10-06 18:54 ` [PATCH 02/10] -mm: clocksource: small cleanup Daniel Walker
@ 2006-10-06 18:54 ` Daniel Walker
  2006-10-06 18:54 ` [PATCH 04/10] -mm: clocksource: add some new " Daniel Walker
                   ` (7 subsequent siblings)
  10 siblings, 0 replies; 31+ messages in thread
From: Daniel Walker @ 2006-10-06 18:54 UTC (permalink / raw)
  To: akpm; +Cc: linux-kernel, johnstul

[-- Attachment #1: clocksource_move_old_api.patch --]
[-- Type: text/plain, Size: 4207 bytes --]

I added this to make the next few patches cleaner. The mixing of code removal
and code addition can make a patchset harder to read. 

There's nothing new here from the first patchset, it just moves some code out
of the way.

Signed-Off-By: Daniel Walker <dwalker@mvista.com>

---
 kernel/time/clocksource.c |  120 +++++++++++++++++++++++-----------------------
 1 files changed, 61 insertions(+), 59 deletions(-)

Index: linux-2.6.18/kernel/time/clocksource.c
===================================================================
--- linux-2.6.18.orig/kernel/time/clocksource.c
+++ linux-2.6.18/kernel/time/clocksource.c
@@ -31,6 +31,8 @@
 
 /* XXX - Would like a better way for initializing curr_clocksource */
 extern struct clocksource clocksource_jiffies;
+static int is_registered_source(struct clocksource *c);
+static struct clocksource *select_clocksource(void);
 
 /*[Clocksource internal variables]---------
  * curr_clocksource:
@@ -70,65 +72,6 @@ struct clocksource *clocksource_get_next
 }
 
 /**
- * select_clocksource - Finds the best registered clocksource.
- *
- * Private function. Must hold clocksource_lock when called.
- *
- * Looks through the list of registered clocksources, returning
- * the one with the highest rating value. If there is a clocksource
- * name that matches the override string, it returns that clocksource.
- */
-static struct clocksource *select_clocksource(void)
-{
-	struct clocksource *best = NULL;
-	struct list_head *tmp;
-
-	list_for_each(tmp, &clocksource_list) {
-		struct clocksource *src;
-
-		src = list_entry(tmp, struct clocksource, list);
-		if (!best)
-			best = src;
-
-		/* check for override: */
-		if (strlen(src->name) == strlen(override_name) &&
-		    !strcmp(src->name, override_name)) {
-			best = src;
-			break;
-		}
-		/* pick the highest rating: */
-		if (src->rating > best->rating)
-		 	best = src;
-	}
-
-	return best;
-}
-
-/**
- * is_registered_source - Checks if clocksource is registered
- * @c:		pointer to a clocksource
- *
- * Private helper function. Must hold clocksource_lock when called.
- *
- * Returns one if the clocksource is already registered, zero otherwise.
- */
-static int is_registered_source(struct clocksource *c)
-{
-	int len = strlen(c->name);
-	struct list_head *tmp;
-
-	list_for_each(tmp, &clocksource_list) {
-		struct clocksource *src;
-
-		src = list_entry(tmp, struct clocksource, list);
-		if (strlen(src->name) == len &&	!strcmp(src->name, c->name))
-			return 1;
-	}
-
-	return 0;
-}
-
-/**
  * clocksource_register - Used to install new clocksources
  * @t:		clocksource to be registered
  *
@@ -334,3 +277,62 @@ static int __init boot_override_clock(ch
 }
 
 __setup("clock=", boot_override_clock);
+
+/**
+ * select_clocksource - Finds the best registered clocksource.
+ *
+ * Private function. Must hold clocksource_lock when called.
+ *
+ * Looks through the list of registered clocksources, returning
+ * the one with the highest rating value. If there is a clocksource
+ * name that matches the override string, it returns that clocksource.
+ */
+static struct clocksource *select_clocksource(void)
+{
+	struct clocksource *best = NULL;
+	struct list_head *tmp;
+
+	list_for_each(tmp, &clocksource_list) {
+		struct clocksource *src;
+
+		src = list_entry(tmp, struct clocksource, list);
+		if (!best)
+			best = src;
+
+		/* check for override: */
+		if (strlen(src->name) == strlen(override_name) &&
+		    !strcmp(src->name, override_name)) {
+			best = src;
+			break;
+		}
+		/* pick the highest rating: */
+		if (src->rating > best->rating)
+		 	best = src;
+	}
+
+	return best;
+}
+
+/**
+ * is_registered_source - Checks if clocksource is registered
+ * @c:		pointer to a clocksource
+ *
+ * Private helper function. Must hold clocksource_lock when called.
+ *
+ * Returns one if the clocksource is already registered, zero otherwise.
+ */
+static int is_registered_source(struct clocksource *c)
+{
+	int len = strlen(c->name);
+	struct list_head *tmp;
+
+	list_for_each(tmp, &clocksource_list) {
+		struct clocksource *src;
+
+		src = list_entry(tmp, struct clocksource, list);
+		if (strlen(src->name) == len &&	!strcmp(src->name, c->name))
+			return 1;
+	}
+
+	return 0;
+}

--


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH 04/10] -mm: clocksource: add some new API calls
  2006-10-06 18:54 [PATCH 00/10] -mm: generic clocksource API -v2 Daniel Walker
                   ` (2 preceding siblings ...)
  2006-10-06 18:54 ` [PATCH 03/10] -mm: clocksource: move old API calls Daniel Walker
@ 2006-10-06 18:54 ` Daniel Walker
  2006-10-09 19:01   ` john stultz
  2006-10-06 18:54 ` [PATCH 05/10] -mm: clocksource: convert generic timeofday Daniel Walker
                   ` (6 subsequent siblings)
  10 siblings, 1 reply; 31+ messages in thread
From: Daniel Walker @ 2006-10-06 18:54 UTC (permalink / raw)
  To: akpm; +Cc: linux-kernel, johnstul

[-- Attachment #1: clocksource_user_api.patch --]
[-- Type: text/plain, Size: 11700 bytes --]


originally used plist but removed it in this patch, and used a sorted list 
which is just as fast with a lot less memory overhead.

Introduces some new API calls,

- clocksource_get_clock()
	Allows a clock lookup by name.
- clocksource_rating_change()
	Used by a clocksource to signal a rating change. Replaces 
	reselect_clocksource()

I also moved the the clock source list to a plist, which removes some lookup
overhead in the average case.

Signed-Off-By: Daniel Walker <dwalker@mvista.com>

---
 arch/i386/kernel/tsc.c      |    2 
 include/linux/clocksource.h |   44 ++++++++-
 kernel/time/clocksource.c   |  213 ++++++++++++++++++++++++++++----------------
 3 files changed, 179 insertions(+), 80 deletions(-)

Index: linux-2.6.18/arch/i386/kernel/tsc.c
===================================================================
--- linux-2.6.18.orig/arch/i386/kernel/tsc.c
+++ linux-2.6.18/arch/i386/kernel/tsc.c
@@ -351,7 +351,7 @@ static int tsc_update_callback(void)
 	/* check to see if we should switch to the safe clocksource: */
 	if (clocksource_tsc.rating != 50 && check_tsc_unstable()) {
 		clocksource_tsc.rating = 50;
-		clocksource_reselect();
+		clocksource_rating_change(&clocksource_tsc);
 		change = 1;
 	}
 
Index: linux-2.6.18/include/linux/clocksource.h
===================================================================
--- linux-2.6.18.orig/include/linux/clocksource.h
+++ linux-2.6.18/include/linux/clocksource.h
@@ -12,12 +12,20 @@
 #include <linux/timex.h>
 #include <linux/time.h>
 #include <linux/list.h>
+#include <linux/plist.h>
+#include <linux/sysdev.h>
 #include <asm/div64.h>
 #include <asm/io.h>
 
 /* clocksource cycle base type */
 typedef u64 cycle_t;
 
+/*
+ * This is the only generic clock, it should be used
+ * for early initialization.
+ */
+extern struct clocksource clocksource_jiffies;
+
 /**
  * struct clocksource - hardware abstraction for a free running counter
  *	Provides mostly state-free accessors to the underlying hardware.
@@ -148,6 +156,25 @@ static inline s64 cyc2ns(struct clocksou
 }
 
 /**
+ * ns2cyc - converts nanoseconds to clocksource cycles
+ * @cs:		Pointer to clocksource
+ * @ns:		Nanoseconds
+ *
+ * Uses the clocksource to convert nanoseconds back to cycles.
+ *
+ * XXX - This could use some mult_lxl_ll() asm optimization
+ */
+static inline cycle_t ns2cyc(struct clocksource *cs, s64 ns)
+{
+	u64 ret = ns;
+
+	ret <<= cs->shift;
+	do_div(ret, cs->mult);
+
+	return ret;
+}
+
+/**
  * clocksource_calculate_interval - Calculates a clocksource interval struct
  *
  * @c:		Pointer to clocksource.
@@ -178,8 +205,19 @@ static inline void clocksource_calculate
 
 
 /* used to install a new clocksource */
-int clocksource_register(struct clocksource*);
-void clocksource_reselect(void);
-struct clocksource* clocksource_get_next(void);
+extern struct clocksource *clocksource_get_next(void);
+extern int clocksource_register(struct clocksource*);
+extern void clocksource_rating_change(struct clocksource*);
+extern struct clocksource * clocksource_get_clock(char*);
 
+/**
+ * clocksource_get_best_clock - Finds highest rated clocksource
+ *
+ * Returns the highest rated clocksource. If none are register the
+ * jiffies clock is returned.
+ */
+static inline struct clocksource * clocksource_get_best_clock(void)
+{
+	return clocksource_get_clock(NULL);
+}
 #endif /* _LINUX_CLOCKSOURCE_H */
Index: linux-2.6.18/kernel/time/clocksource.c
===================================================================
--- linux-2.6.18.orig/kernel/time/clocksource.c
+++ linux-2.6.18/kernel/time/clocksource.c
@@ -31,8 +31,6 @@
 
 /* XXX - Would like a better way for initializing curr_clocksource */
 extern struct clocksource clocksource_jiffies;
-static int is_registered_source(struct clocksource *c);
-static struct clocksource *select_clocksource(void);
 
 /*[Clocksource internal variables]---------
  * curr_clocksource:
@@ -40,7 +38,7 @@ static struct clocksource *select_clocks
  * next_clocksource:
  *	pending next selected clocksource.
  * clocksource_list:
- *	linked list with the registered clocksources
+ *	priority list with the registered clocksources
  * clocksource_lock:
  *	protects manipulations to curr_clocksource and next_clocksource
  *	and the clocksource_list
@@ -49,7 +47,7 @@ static struct clocksource *select_clocks
  */
 static struct clocksource *curr_clocksource = &clocksource_jiffies;
 static struct clocksource *next_clocksource;
-static LIST_HEAD(clocksource_list);
+static struct list_head clocksource_list = LIST_HEAD_INIT(clocksource_list);
 static DEFINE_SPINLOCK(clocksource_lock);
 static char override_name[32];
 
@@ -72,10 +70,122 @@ struct clocksource *clocksource_get_next
 }
 
 /**
+ * __is_registered - Returns a clocksource if it's registered
+ * @name:		name of the clocksource to return
+ *
+ * Private function. Must hold clocksource_lock when called.
+ *
+ * Returns the clocksource if registered, zero otherwise.
+ * If no clocksources are registered the jiffies clock is
+ * returned.
+ */
+static struct clocksource * __is_registered(char * name)
+{
+	struct list_head *tmp;
+
+	list_for_each(tmp, &clocksource_list) {
+		struct clocksource *src;
+
+		src = list_entry(tmp, struct clocksource, list);
+		if (!strcmp(src->name, name))
+			return src;
+	}
+
+	return 0;
+}
+
+/**
+ * __get_clock - Finds a specific clocksource
+ * @name:		name of the clocksource to return
+ *
+ * Private function. Must hold clocksource_lock when called.
+ *
+ * Returns the clocksource if registered, zero otherwise.
+ * If the @name is null the highest rated clock is returned.
+ */
+static inline struct clocksource * __get_clock(char * name)
+{
+
+	if (unlikely(list_empty(&clocksource_list)))
+		return &clocksource_jiffies;
+
+	if (!name)
+		return list_entry(clocksource_list.next,
+				  struct clocksource, list);
+
+	return __is_registered(name);
+}
+
+/**
+ * clocksource_get_clock - Finds a specific clocksource
+ * @name:		name of the clocksource to return
+ *
+ * Returns the clocksource if registered, zero otherwise.
+ */
+struct clocksource * clocksource_get_clock(char * name)
+{
+	struct clocksource * ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&clocksource_lock, flags);
+	ret = __get_clock(name);
+	spin_unlock_irqrestore(&clocksource_lock, flags);
+	return ret;
+}
+
+
+/**
+ * select_clocksource - Finds the best registered clocksource.
+ *
+ * Private function. Must hold clocksource_lock when called.
+ *
+ * Looks through the list of registered clocksources, returning
+ * the one with the highest rating value. If there is a clocksource
+ * name that matches the override string, it returns that clocksource.
+ */
+static struct clocksource *select_clocksource(void)
+{
+	if (!*override_name)
+		return list_entry(clocksource_list.next,
+				  struct clocksource, list);
+
+	return __get_clock(override_name);
+}
+/*
+ * __sorted_list_add - Sorted clocksource add
+ * @c:			clocksource to add
+ *
+ * Adds a clocksource to the clocksource_list in sorted order.
+ */
+static void __sorted_list_add(struct clocksource *c)
+{
+	struct list_head *tmp;
+
+	list_for_each(tmp, &clocksource_list) {
+		struct clocksource *src;
+
+		src = list_entry(tmp, struct clocksource, list);
+
+		if (c->rating > src->rating) {
+			list_add_tail(&c->list, &src->list);
+			return;
+		}
+	}
+
+	/*
+	 * If it's bigger/smaller than all the other entries put it
+	 * at the end.
+	 */
+	list_add_tail(&c->list, &clocksource_list);
+}
+
+/**
  * clocksource_register - Used to install new clocksources
  * @t:		clocksource to be registered
  *
- * Returns -EBUSY if registration fails, zero otherwise.
+ * Returns -EBUSY clock is already registered,
+ * Returns -EINVAL if clocksource is invalid,
+ * Return zero otherwise.
  */
 int clocksource_register(struct clocksource *c)
 {
@@ -83,14 +193,13 @@ int clocksource_register(struct clocksou
 	unsigned long flags;
 
 	spin_lock_irqsave(&clocksource_lock, flags);
-	/* check if clocksource is already registered */
-	if (is_registered_source(c)) {
-		printk("register_clocksource: Cannot register %s. "
+	if (unlikely(!list_empty(&c->list) && __is_registered(c->name))) {
+		printk("register_clocksource: Cannot register %s clocksource. "
 		       "Already registered!", c->name);
 		ret = -EBUSY;
 	} else {
-		/* register it */
- 		list_add(&c->list, &clocksource_list);
+		INIT_LIST_HEAD(&c->list);
+ 		__sorted_list_add(c);
 		/* scan the registered clocksources, and pick the best one */
 		next_clocksource = select_clocksource();
 	}
@@ -100,21 +209,31 @@ int clocksource_register(struct clocksou
 EXPORT_SYMBOL(clocksource_register);
 
 /**
- * clocksource_reselect - Rescan list for next clocksource
+ * clocksource_rating_change - Allows dynamic rating changes for register
+ *                           clocksources.
  *
- * A quick helper function to be used if a clocksource changes its
- * rating. Forces the clocksource list to be re-scanned for the best
- * clocksource.
+ * Signals that a clocksource is dynamically changing it's rating.
+ * This could happen if a clocksource becomes more/less stable.
  */
-void clocksource_reselect(void)
+void clocksource_rating_change(struct clocksource *c)
 {
 	unsigned long flags;
 
+	if (unlikely(list_empty(&c->list)))
+		return;
+
 	spin_lock_irqsave(&clocksource_lock, flags);
+
+	/*
+	 * Re-register the clocksource with it's new rating.
+	 */
+	list_del_init(&c->list);
+	__sorted_list_add(c);
+
 	next_clocksource = select_clocksource();
 	spin_unlock_irqrestore(&clocksource_lock, flags);
 }
-EXPORT_SYMBOL(clocksource_reselect);
+EXPORT_SYMBOL(clocksource_rating_change);
 
 /**
  * sysfs_show_current_clocksources - sysfs interface for current clocksource
@@ -179,7 +298,8 @@ static ssize_t sysfs_override_clocksourc
  * @dev:	unused
  * @buf:	char buffer to be filled with clocksource list
  *
- * Provides sysfs interface for listing registered clocksources
+ * Provides sysfs interface for listing registered clocksources.
+ * Output in priority sorted order.
  */
 static ssize_t
 sysfs_show_available_clocksources(struct sys_device *dev, char *buf)
@@ -277,62 +397,3 @@ static int __init boot_override_clock(ch
 }
 
 __setup("clock=", boot_override_clock);
-
-/**
- * select_clocksource - Finds the best registered clocksource.
- *
- * Private function. Must hold clocksource_lock when called.
- *
- * Looks through the list of registered clocksources, returning
- * the one with the highest rating value. If there is a clocksource
- * name that matches the override string, it returns that clocksource.
- */
-static struct clocksource *select_clocksource(void)
-{
-	struct clocksource *best = NULL;
-	struct list_head *tmp;
-
-	list_for_each(tmp, &clocksource_list) {
-		struct clocksource *src;
-
-		src = list_entry(tmp, struct clocksource, list);
-		if (!best)
-			best = src;
-
-		/* check for override: */
-		if (strlen(src->name) == strlen(override_name) &&
-		    !strcmp(src->name, override_name)) {
-			best = src;
-			break;
-		}
-		/* pick the highest rating: */
-		if (src->rating > best->rating)
-		 	best = src;
-	}
-
-	return best;
-}
-
-/**
- * is_registered_source - Checks if clocksource is registered
- * @c:		pointer to a clocksource
- *
- * Private helper function. Must hold clocksource_lock when called.
- *
- * Returns one if the clocksource is already registered, zero otherwise.
- */
-static int is_registered_source(struct clocksource *c)
-{
-	int len = strlen(c->name);
-	struct list_head *tmp;
-
-	list_for_each(tmp, &clocksource_list) {
-		struct clocksource *src;
-
-		src = list_entry(tmp, struct clocksource, list);
-		if (strlen(src->name) == len &&	!strcmp(src->name, c->name))
-			return 1;
-	}
-
-	return 0;
-}

--


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH 05/10] -mm: clocksource: convert generic timeofday
  2006-10-06 18:54 [PATCH 00/10] -mm: generic clocksource API -v2 Daniel Walker
                   ` (3 preceding siblings ...)
  2006-10-06 18:54 ` [PATCH 04/10] -mm: clocksource: add some new " Daniel Walker
@ 2006-10-06 18:54 ` Daniel Walker
  2006-10-07 23:04   ` Oleg Verych
  2006-10-09 19:39   ` john stultz
  2006-10-06 18:54 ` [PATCH 06/10] -mm: clocksource: add block notifier Daniel Walker
                   ` (5 subsequent siblings)
  10 siblings, 2 replies; 31+ messages in thread
From: Daniel Walker @ 2006-10-06 18:54 UTC (permalink / raw)
  To: akpm; +Cc: linux-kernel, johnstul

[-- Attachment #1: clocksource_more_generic.patch --]
[-- Type: text/plain, Size: 16278 bytes --]

Delete alot of remaining code in kernel/time/clocksource.c that
is replaced with this patch. Removed the deprecated "clock" kernel
parameter. 

Shifts some of the code around so that the time of day override 
happens inside kernel/timer.c.

The biggest timeofday changes are in update_wall_time() and
change_clocksource(). I removed the unconditional call to 
change_clocksource(), and replaced it with a single atomic
check. The atomic is asserted only when a clock change is
needed. update_callback is no longer driven from 
update_wall_time().

The fast path is now a single atomic check.

Signed-Off-By: Daniel Walker <dwalker@mvista.com>

---
 include/linux/clocksource.h |    3 
 kernel/time/clocksource.c   |  216 ++++++--------------------------------------
 kernel/timer.c              |  167 +++++++++++++++++++++++++++++-----
 3 files changed, 178 insertions(+), 208 deletions(-)

Index: linux-2.6.18/include/linux/clocksource.h
===================================================================
--- linux-2.6.18.orig/include/linux/clocksource.h
+++ linux-2.6.18/include/linux/clocksource.h
@@ -205,7 +205,8 @@ static inline void clocksource_calculate
 
 
 /* used to install a new clocksource */
-extern struct clocksource *clocksource_get_next(void);
+extern int clocksource_sysfs_register(struct sysdev_attribute*);
+extern void clocksource_sysfs_unregister(struct sysdev_attribute*);
 extern int clocksource_register(struct clocksource*);
 extern void clocksource_rating_change(struct clocksource*);
 extern struct clocksource * clocksource_get_clock(char*);
Index: linux-2.6.18/kernel/time/clocksource.c
===================================================================
--- linux-2.6.18.orig/kernel/time/clocksource.c
+++ linux-2.6.18/kernel/time/clocksource.c
@@ -5,6 +5,8 @@
  *
  * Copyright (C) 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com)
  *
+ * Copyright (C) 2006 MontaVista Daniel Walker (dwalker@mvista.com)
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -21,7 +23,6 @@
  *
  * TODO WishList:
  *   o Allow clocksource drivers to be unregistered
- *   o get rid of clocksource_jiffies extern
  */
 
 #include <linux/clocksource.h>
@@ -29,45 +30,15 @@
 #include <linux/init.h>
 #include <linux/module.h>
 
-/* XXX - Would like a better way for initializing curr_clocksource */
-extern struct clocksource clocksource_jiffies;
-
 /*[Clocksource internal variables]---------
- * curr_clocksource:
- *	currently selected clocksource. Initialized to clocksource_jiffies.
- * next_clocksource:
- *	pending next selected clocksource.
  * clocksource_list:
  *	priority list with the registered clocksources
  * clocksource_lock:
- *	protects manipulations to curr_clocksource and next_clocksource
- *	and the clocksource_list
- * override_name:
- *	Name of the user-specified clocksource.
+ * 	protects manipulations to the clocksource_list
  */
-static struct clocksource *curr_clocksource = &clocksource_jiffies;
-static struct clocksource *next_clocksource;
-static struct list_head clocksource_list = LIST_HEAD_INIT(clocksource_list);
+static __read_mostly
+struct list_head clocksource_list = LIST_HEAD_INIT(clocksource_list);
 static DEFINE_SPINLOCK(clocksource_lock);
-static char override_name[32];
-
-/**
- * clocksource_get_next - Returns the selected clocksource
- *
- */
-struct clocksource *clocksource_get_next(void)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&clocksource_lock, flags);
-	if (next_clocksource) {
-		curr_clocksource = next_clocksource;
-		next_clocksource = NULL;
-	}
-	spin_unlock_irqrestore(&clocksource_lock, flags);
-
-	return curr_clocksource;
-}
 
 /**
  * __is_registered - Returns a clocksource if it's registered
@@ -95,28 +66,6 @@ static struct clocksource * __is_registe
 }
 
 /**
- * __get_clock - Finds a specific clocksource
- * @name:		name of the clocksource to return
- *
- * Private function. Must hold clocksource_lock when called.
- *
- * Returns the clocksource if registered, zero otherwise.
- * If the @name is null the highest rated clock is returned.
- */
-static inline struct clocksource * __get_clock(char * name)
-{
-
-	if (unlikely(list_empty(&clocksource_list)))
-		return &clocksource_jiffies;
-
-	if (!name)
-		return list_entry(clocksource_list.next,
-				  struct clocksource, list);
-
-	return __is_registered(name);
-}
-
-/**
  * clocksource_get_clock - Finds a specific clocksource
  * @name:		name of the clocksource to return
  *
@@ -128,29 +77,17 @@ struct clocksource * clocksource_get_clo
 	unsigned long flags;
 
 	spin_lock_irqsave(&clocksource_lock, flags);
-	ret = __get_clock(name);
+	if (unlikely(list_empty(&clocksource_list)))
+		ret = &clocksource_jiffies;
+	else if (!name)
+		ret = list_entry(clocksource_list.next,
+				 struct clocksource, list);
+	else
+		ret = __is_registered(name);
 	spin_unlock_irqrestore(&clocksource_lock, flags);
 	return ret;
 }
 
-
-/**
- * select_clocksource - Finds the best registered clocksource.
- *
- * Private function. Must hold clocksource_lock when called.
- *
- * Looks through the list of registered clocksources, returning
- * the one with the highest rating value. If there is a clocksource
- * name that matches the override string, it returns that clocksource.
- */
-static struct clocksource *select_clocksource(void)
-{
-	if (!*override_name)
-		return list_entry(clocksource_list.next,
-				  struct clocksource, list);
-
-	return __get_clock(override_name);
-}
 /*
  * __sorted_list_add - Sorted clocksource add
  * @c:			clocksource to add
@@ -200,8 +137,6 @@ int clocksource_register(struct clocksou
 	} else {
 		INIT_LIST_HEAD(&c->list);
  		__sorted_list_add(c);
-		/* scan the registered clocksources, and pick the best one */
-		next_clocksource = select_clocksource();
 	}
 	spin_unlock_irqrestore(&clocksource_lock, flags);
 	return ret;
@@ -230,70 +165,12 @@ void clocksource_rating_change(struct cl
 	list_del_init(&c->list);
 	__sorted_list_add(c);
 
-	next_clocksource = select_clocksource();
+	/* XXX: Add block notifier to signal new rating */
 	spin_unlock_irqrestore(&clocksource_lock, flags);
 }
 EXPORT_SYMBOL(clocksource_rating_change);
 
 /**
- * sysfs_show_current_clocksources - sysfs interface for current clocksource
- * @dev:	unused
- * @buf:	char buffer to be filled with clocksource list
- *
- * Provides sysfs interface for listing current clocksource.
- */
-static ssize_t
-sysfs_show_current_clocksources(struct sys_device *dev, char *buf)
-{
-	char *curr = buf;
-
-	spin_lock_irq(&clocksource_lock);
-	curr += sprintf(curr, "%s ", curr_clocksource->name);
-	spin_unlock_irq(&clocksource_lock);
-
-	curr += sprintf(curr, "\n");
-
-	return curr - buf;
-}
-
-/**
- * sysfs_override_clocksource - interface for manually overriding clocksource
- * @dev:	unused
- * @buf:	name of override clocksource
- * @count:	length of buffer
- *
- * Takes input from sysfs interface for manually overriding the default
- * clocksource selction.
- */
-static ssize_t sysfs_override_clocksource(struct sys_device *dev,
-					  const char *buf, size_t count)
-{
-	size_t ret = count;
-	/* strings from sysfs write are not 0 terminated! */
-	if (count >= sizeof(override_name))
-		return -EINVAL;
-
-	/* strip of \n: */
-	if (buf[count-1] == '\n')
-		count--;
-	if (count < 1)
-		return -EINVAL;
-
-	spin_lock_irq(&clocksource_lock);
-
-	/* copy the name given: */
-	memcpy(override_name, buf, count);
-	override_name[count] = 0;
-
-	/* try to select it: */
-	next_clocksource = select_clocksource();
-
-	spin_unlock_irq(&clocksource_lock);
-
-	return ret;
-}
-
-/**
  * sysfs_show_available_clocksources - sysfs interface for listing clocksource
  * @dev:	unused
  * @buf:	char buffer to be filled with clocksource list
@@ -322,11 +199,8 @@ sysfs_show_available_clocksources(struct
 }
 
 /*
- * Sysfs setup bits:
+ * Generic sysfs setup bits:
  */
-static SYSDEV_ATTR(current_clocksource, 0600, sysfs_show_current_clocksources,
-		   sysfs_override_clocksource);
-
 static SYSDEV_ATTR(available_clocksource, 0600,
 		   sysfs_show_available_clocksources, NULL);
 
@@ -339,6 +213,21 @@ static struct sys_device device_clocksou
 	.cls	= &clocksource_sysclass,
 };
 
+/**
+ * clocksource_sysfs_register - interface to register a sysfs
+ *				hook under the clocksource sys_device.
+ * @attr:	sysdev_attribute created with the SYSDEV_ATTR macro.
+ *
+ * This functions should be used to create a sysfs file under
+ * the clocksource directory which will be used to show the current
+ * clock used by the code calling clocksource_sysfs_register(), and
+ * set a specific overide when written to.
+ */
+int clocksource_sysfs_register(struct sysdev_attribute * attr)
+{
+	return sysdev_create_file(&device_clocksource, attr);
+}
+
 static int __init init_clocksource_sysfs(void)
 {
 	int error = sysdev_class_register(&clocksource_sysclass);
@@ -348,52 +237,7 @@ static int __init init_clocksource_sysfs
 	if (!error)
 		error = sysdev_create_file(
 				&device_clocksource,
-				&attr_current_clocksource);
-	if (!error)
-		error = sysdev_create_file(
-				&device_clocksource,
 				&attr_available_clocksource);
 	return error;
 }
-
-device_initcall(init_clocksource_sysfs);
-
-/**
- * boot_override_clocksource - boot clock override
- * @str:	override name
- *
- * Takes a clocksource= boot argument and uses it
- * as the clocksource override name.
- */
-static int __init boot_override_clocksource(char* str)
-{
-	unsigned long flags;
-	spin_lock_irqsave(&clocksource_lock, flags);
-	if (str)
-		strlcpy(override_name, str, sizeof(override_name));
-	spin_unlock_irqrestore(&clocksource_lock, flags);
-	return 1;
-}
-
-__setup("clocksource=", boot_override_clocksource);
-
-/**
- * boot_override_clock - Compatibility layer for deprecated boot option
- * @str:	override name
- *
- * DEPRECATED! Takes a clock= boot argument and uses it
- * as the clocksource override name
- */
-static int __init boot_override_clock(char* str)
-{
-	if (!strcmp(str, "pmtmr")) {
-		printk("Warning: clock=pmtmr is deprecated. "
-			"Use clocksource=acpi_pm.\n");
-		return boot_override_clocksource("acpi_pm");
-	}
-	printk("Warning! clock= boot option is deprecated. "
-		"Use clocksource=xyz\n");
-	return boot_override_clocksource(str);
-}
-
-__setup("clock=", boot_override_clock);
+postcore_initcall(init_clocksource_sysfs);
Index: linux-2.6.18/kernel/timer.c
===================================================================
--- linux-2.6.18.orig/kernel/timer.c
+++ linux-2.6.18/kernel/timer.c
@@ -17,6 +17,8 @@
  *  2000-10-05  Implemented scalable SMP per-CPU timer handling.
  *                              Copyright (C) 2000, 2001, 2002  Ingo Molnar
  *              Designed by David S. Miller, Alexey Kuznetsov and Ingo Molnar
+ *  2006-08-03  Added usage of the generic clocksource API
+ *				Copyright (C) 2006 MontaVista, Daniel Walker
  */
 
 #include <linux/kernel_stat.h>
@@ -578,9 +580,15 @@ EXPORT_SYMBOL(xtime);
 
 /* XXX - all of this timekeeping code should be later moved to time.c */
 #include <linux/clocksource.h>
-static struct clocksource *clock; /* pointer to current clocksource */
+/* pointer to current clocksource */
+static struct clocksource *clock = &clocksource_jiffies;
+static char clock_override_name[32];
+
+/* Interrupt update singaling variables */
+static atomic_t clock_check = ATOMIC_INIT(0);
 
 #ifdef CONFIG_GENERIC_TIME
+
 /**
  * __get_nsec_offset - Returns nanoseconds since last call to periodic_hook
  *
@@ -700,29 +708,119 @@ EXPORT_SYMBOL(do_settimeofday);
  *
  * Accumulates current time interval and initializes new clocksource
  */
-static int change_clocksource(void)
+static int change_clocksource(char * override)
 {
-	struct clocksource *new;
-	cycle_t now;
 	u64 nsec;
-	new = clocksource_get_next();
-	if (clock != new) {
-		now = clocksource_read(new);
-		nsec =  __get_nsec_offset();
-		timespec_add_ns(&xtime, nsec);
-
-		clock = new;
-		clock->cycle_last = now;
-		printk(KERN_INFO "Time: %s clocksource has been installed.\n",
-		       clock->name);
-		return 1;
-	} else if (clock->update_callback) {
-		return clock->update_callback();
+	cycle_t now;
+	struct clocksource *new = clocksource_get_clock(override);
+
+	now = clocksource_read(new);
+	nsec =  __get_nsec_offset();
+	timespec_add_ns(&xtime, nsec);
+
+	clock = new;
+	clock->cycle_last = now;
+	printk(KERN_INFO "Time: %s clocksource has been installed.\n",
+	       clock->name);
+
+	return 1;
+}
+
+/**
+ * sysfs_show_current_clocksources - sysfs interface for current clocksource
+ * @dev:	unused
+ * @buf:	char buffer to be filled with clocksource list
+ *
+ * Provides sysfs interface for listing the current clocksource.
+ * Locking handled inside sysfs.
+ */
+static ssize_t
+sysfs_show_current_clocksources(struct sys_device *dev, char *buf)
+{
+	return sprintf(buf, "%s \n", clock->name);
+}
+
+/**
+ * sysfs_override_clocksource - interface for manually overriding clocksource
+ * @dev:	unused
+ * @buf:	name of override clocksource
+ * @count:	length of buffer
+ *
+ * Takes input from sysfs interface for manually overriding the default
+ * clocksource selction. Locking handled inside sysfs
+ */
+static ssize_t sysfs_override_clocksource(struct sys_device *dev,
+					  const char *buf, size_t count)
+{
+	size_t ret = count;
+
+	/*
+	 * If there's already an update in progress then
+	 * we can't proceed.
+	 */
+	if (atomic_read(&clock_check))
+		return -EINVAL;
+
+	/* strings from sysfs write are not 0 terminated! */
+	if (count >= sizeof(clock_override_name))
+		return -EINVAL;
+
+	/* strip of \n: */
+	if (buf[count-1] == '\n')
+		count--;
+	if (count < 1)
+		return -EINVAL;
+
+	/* copy the name given: */
+	memcpy(clock_override_name, buf, count);
+	clock_override_name[count] = 0;
+
+	if (!clocksource_get_clock(clock_override_name)) {
+		clock_override_name[0] = 0;
+		return  -EINVAL;
 	}
-	return 0;
+
+	atomic_inc(&clock_check);
+
+	return ret;
+}
+
+/*
+ * Sysfs atrribure structure.
+ */
+static SYSDEV_ATTR(timeofday_clocksource, 0600, sysfs_show_current_clocksources,
+		   sysfs_override_clocksource);
+
+/**
+ * boot_override_clocksource - boot clock override
+ * @str:	override name
+ *
+ * Takes a clocksource= boot argument and uses it
+ * as the clocksource override name.
+ */
+static int __init boot_override_clocksource(char* str)
+{
+	if (str) {
+		/*
+		 * Make sure the clock exists.
+		 */
+		if (clocksource_get_clock(str))
+			strlcpy(clock_override_name, str,
+				sizeof(clock_override_name));
+		else {
+			printk("Time: requested clock \"%s\" doesn't exist\n",
+			       str);
+			return 0;
+		}
+
+	}
+
+	return 1;
 }
+__setup("timeofday_clocksource=", boot_override_clocksource);
+
 #else
-#define change_clocksource()	do { 0; } while(0)
+#define change_clocksource(x)	do { } while(0)
 #endif
 
 /**
@@ -754,7 +852,7 @@ void __init timekeeping_init(void)
 
 	ntp_clear();
 
-	clock = clocksource_get_next();
+	clock = clocksource_get_best_clock();
 	clocksource_calculate_interval(clock, tick_nsec);
 	clock->cycle_last = clocksource_read(clock);
 
@@ -811,6 +909,17 @@ static int __init timekeeping_init_devic
 	int error = sysdev_class_register(&timekeeping_sysclass);
 	if (!error)
 		error = sysdev_register(&device_timer);
+
+#ifdef CONFIG_GENERIC_TIME
+	clocksource_sysfs_register(&attr_timeofday_clocksource);
+
+	/*
+	 * All the clocks should be registered at this point,
+	 * so it's safe to trigger a switch to a higher res
+	 * clock.
+	 */
+	atomic_inc(&clock_check);
+#endif
 	return error;
 }
 
@@ -952,10 +1061,26 @@ static void update_wall_time(void)
 	clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift;
 
 	/* check to see if there is a new clocksource to use */
-	if (change_clocksource()) {
+	if (unlikely(atomic_read(&clock_check))) {
+
+		/*
+		 * Switch to the new override clock, or the highest
+		 * rated clock.
+		 */
+		if (*clock_override_name)
+			change_clocksource(clock_override_name);
+		else
+			change_clocksource(NULL);
+
 		clock->error = 0;
 		clock->xtime_nsec = 0;
 		clocksource_calculate_interval(clock, tick_nsec);
+
+		/*
+		 * Remove the change signal
+		 */
+		atomic_dec(&clock_check);
+
 	}
 }
 

--


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH 06/10] -mm: clocksource: add block notifier
  2006-10-06 18:54 [PATCH 00/10] -mm: generic clocksource API -v2 Daniel Walker
                   ` (4 preceding siblings ...)
  2006-10-06 18:54 ` [PATCH 05/10] -mm: clocksource: convert generic timeofday Daniel Walker
@ 2006-10-06 18:54 ` Daniel Walker
  2006-10-09 19:45   ` john stultz
  2006-10-06 18:54 ` [PATCH 07/10] -mm: clocksource: remove update_callback Daniel Walker
                   ` (4 subsequent siblings)
  10 siblings, 1 reply; 31+ messages in thread
From: Daniel Walker @ 2006-10-06 18:54 UTC (permalink / raw)
  To: akpm; +Cc: linux-kernel, johnstul

[-- Attachment #1: clocksource_add_block_notify_on_new_clock.patch --]
[-- Type: text/plain, Size: 3722 bytes --]

Adds a call back interface for register/rating change
events.

Signed-Off-By: Daniel Walker <dwalker@mvista.com>

---
 include/linux/clocksource.h |    8 ++++++++
 kernel/time/clocksource.c   |   19 ++++++++++++++++++-
 kernel/timer.c              |   12 ++++++++++++
 3 files changed, 38 insertions(+), 1 deletion(-)

Index: linux-2.6.18/include/linux/clocksource.h
===================================================================
--- linux-2.6.18.orig/include/linux/clocksource.h
+++ linux-2.6.18/include/linux/clocksource.h
@@ -14,6 +14,7 @@
 #include <linux/list.h>
 #include <linux/plist.h>
 #include <linux/sysdev.h>
+#include <linux/notifier.h>
 #include <asm/div64.h>
 #include <asm/io.h>
 
@@ -26,6 +27,12 @@ typedef u64 cycle_t;
  */
 extern struct clocksource clocksource_jiffies;
 
+/*
+ * Block notifier flags.
+ */
+#define CLOCKSOURCE_NOTIFY_REGISTER	1
+#define CLOCKSOURCE_NOTIFY_RATING	2
+
 /**
  * struct clocksource - hardware abstraction for a free running counter
  *	Provides mostly state-free accessors to the underlying hardware.
@@ -205,6 +212,7 @@ static inline void clocksource_calculate
 
 
 /* used to install a new clocksource */
+extern int clocksource_notifier_register(struct notifier_block*);
 extern int clocksource_sysfs_register(struct sysdev_attribute*);
 extern void clocksource_sysfs_unregister(struct sysdev_attribute*);
 extern int clocksource_register(struct clocksource*);
Index: linux-2.6.18/kernel/time/clocksource.c
===================================================================
--- linux-2.6.18.orig/kernel/time/clocksource.c
+++ linux-2.6.18/kernel/time/clocksource.c
@@ -39,6 +39,18 @@
 static __read_mostly
 struct list_head clocksource_list = LIST_HEAD_INIT(clocksource_list);
 static DEFINE_SPINLOCK(clocksource_lock);
+static ATOMIC_NOTIFIER_HEAD(clocksource_list_notifier);
+
+/**
+ * clocksource_notifier_register - Registers a list change notifier
+ * @nb:		pointer to a notifier block
+ *
+ * Returns zero always.
+ */
+int clocksource_notifier_register(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_register(&clocksource_list_notifier, nb);
+}
 
 /**
  * __is_registered - Returns a clocksource if it's registered
@@ -139,6 +151,9 @@ int clocksource_register(struct clocksou
  		__sorted_list_add(c);
 	}
 	spin_unlock_irqrestore(&clocksource_lock, flags);
+
+	atomic_notifier_call_chain(&clocksource_list_notifier,
+ 				   CLOCKSOURCE_NOTIFY_REGISTER, c);
 	return ret;
 }
 EXPORT_SYMBOL(clocksource_register);
@@ -165,7 +180,9 @@ void clocksource_rating_change(struct cl
 	list_del_init(&c->list);
 	__sorted_list_add(c);
 
-	/* XXX: Add block notifier to signal new rating */
+	atomic_notifier_call_chain(&clocksource_list_notifier,
+				   CLOCKSOURCE_NOTIFY_RATING, c);
+
 	spin_unlock_irqrestore(&clocksource_lock, flags);
 }
 EXPORT_SYMBOL(clocksource_rating_change);
Index: linux-2.6.18/kernel/timer.c
===================================================================
--- linux-2.6.18.orig/kernel/timer.c
+++ linux-2.6.18/kernel/timer.c
@@ -819,6 +819,17 @@ static int __init boot_override_clocksou
 }
 __setup("timeofday_clocksource=", boot_override_clocksource);
 
+static int
+clocksource_callback(struct notifier_block *nb, unsigned long op, void *c)
+{
+	atomic_inc(&clock_check);
+	return 0;
+}
+
+static struct notifier_block clocksource_nb = {
+	.notifier_call = clocksource_callback,
+};
+
 #else
 #define change_clocksource(x)	do { } while(0)
 #endif
@@ -912,6 +923,7 @@ static int __init timekeeping_init_devic
 
 #ifdef CONFIG_GENERIC_TIME
 	clocksource_sysfs_register(&attr_timeofday_clocksource);
+	clocksource_notifier_register(&clocksource_nb);
 
 	/*
 	 * All the clocks should be registered at this point,

--


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH 07/10] -mm: clocksource: remove update_callback
  2006-10-06 18:54 [PATCH 00/10] -mm: generic clocksource API -v2 Daniel Walker
                   ` (5 preceding siblings ...)
  2006-10-06 18:54 ` [PATCH 06/10] -mm: clocksource: add block notifier Daniel Walker
@ 2006-10-06 18:54 ` Daniel Walker
  2006-10-09 19:56   ` john stultz
  2006-10-06 18:54 ` [PATCH 08/10] -mm: clocksource: initialize list value Daniel Walker
                   ` (3 subsequent siblings)
  10 siblings, 1 reply; 31+ messages in thread
From: Daniel Walker @ 2006-10-06 18:54 UTC (permalink / raw)
  To: akpm; +Cc: linux-kernel, johnstul

[-- Attachment #1: clocksource_remove_update_callback.patch --]
[-- Type: text/plain, Size: 4394 bytes --]

Inorporated John's comment of moving the rating change code
into mark_tsc_unstable() , then I renamed tsc_update_callback()
to tsc_update_khz() and added a call to it into places that change
the tsc_khz variable.

With the new notifier block the update_callback becomes
obsolete.

Signed-Off-By: Daniel Walker <dwalker@mvista.com>

---
 arch/i386/kernel/tsc.c      |   54 ++++++++++++++++++++++----------------------
 include/linux/clocksource.h |    2 -
 2 files changed, 28 insertions(+), 28 deletions(-)

Index: linux-2.6.18/arch/i386/kernel/tsc.c
===================================================================
--- linux-2.6.18.orig/arch/i386/kernel/tsc.c
+++ linux-2.6.18/arch/i386/kernel/tsc.c
@@ -50,8 +50,7 @@ static int __init tsc_setup(char *str)
 __setup("notsc", tsc_setup);
 
 /*
- * code to mark and check if the TSC is unstable
- * due to cpufreq or due to unsynced TSCs
+ * Flag that denotes an unstable tsc and check function.
  */
 static int tsc_unstable;
 
@@ -60,12 +59,6 @@ static inline int check_tsc_unstable(voi
 	return tsc_unstable;
 }
 
-void mark_tsc_unstable(void)
-{
-	tsc_unstable = 1;
-}
-EXPORT_SYMBOL_GPL(mark_tsc_unstable);
-
 /* Accellerators for sched_clock()
  * convert from cycles(64bits) => nanoseconds (64bits)
  *  basic equation:
@@ -171,6 +164,21 @@ err:
 	return 0;
 }
 
+#if !defined(CONFIG_SMP) || defined(CONFIG_CPU_FREQ)
+static struct clocksource clocksource_tsc;
+static unsigned long current_tsc_khz;
+static void tsc_update_khz(void)
+{
+	/* only update if tsc_khz has changed: */
+	if (current_tsc_khz != tsc_khz) {
+		current_tsc_khz = tsc_khz;
+		clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz,
+							clocksource_tsc.shift);
+	}
+
+}
+#endif
+
 int recalibrate_cpu_khz(void)
 {
 #ifndef CONFIG_SMP
@@ -179,6 +187,7 @@ int recalibrate_cpu_khz(void)
 	if (cpu_has_tsc) {
 		cpu_khz = calculate_cpu_khz();
 		tsc_khz = cpu_khz;
+		tsc_update_khz();
 		cpu_data[0].loops_per_jiffy =
 			cpufreq_scale(cpu_data[0].loops_per_jiffy,
 					cpu_khz_old, cpu_khz);
@@ -282,6 +291,7 @@ time_cpufreq_notifier(struct notifier_bl
 						ref_freq, freq->new);
 			if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
 				tsc_khz = cpu_khz;
+				tsc_update_khz();
 				set_cyc2ns_scale(cpu_khz);
 				/*
 				 * TSC based sched_clock turns
@@ -322,7 +332,6 @@ core_initcall(cpufreq_tsc);
 /* clock source code */
 
 static unsigned long current_tsc_khz = 0;
-static int tsc_update_callback(void);
 
 static cycle_t read_tsc(void)
 {
@@ -340,31 +349,24 @@ static struct clocksource clocksource_ts
 	.mask			= CLOCKSOURCE_MASK(64),
 	.mult			= 0, /* to be set */
 	.shift			= 22,
-	.update_callback	= tsc_update_callback,
 	.is_continuous		= 1,
 };
 
-static int tsc_update_callback(void)
-{
-	int change = 0;
 
-	/* check to see if we should switch to the safe clocksource: */
-	if (clocksource_tsc.rating != 50 && check_tsc_unstable()) {
+/*
+ * code to mark if the TSC is unstable
+ * due to cpufreq or due to unsynced TSCs
+ */
+void mark_tsc_unstable(void)
+{
+	if (unlikely(!tsc_unstable)) {
 		clocksource_tsc.rating = 50;
 		clocksource_rating_change(&clocksource_tsc);
-		change = 1;
 	}
-
-	/* only update if tsc_khz has changed: */
-	if (current_tsc_khz != tsc_khz) {
-		current_tsc_khz = tsc_khz;
-		clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz,
-							clocksource_tsc.shift);
-		change = 1;
-	}
-
-	return change;
+	tsc_unstable = 1;
 }
+EXPORT_SYMBOL_GPL(mark_tsc_unstable);
+
 
 static int __init dmi_mark_tsc_unstable(struct dmi_system_id *d)
 {
Index: linux-2.6.18/include/linux/clocksource.h
===================================================================
--- linux-2.6.18.orig/include/linux/clocksource.h
+++ linux-2.6.18/include/linux/clocksource.h
@@ -59,7 +59,6 @@ extern struct clocksource clocksource_ji
  *			subtraction of non 64 bit counters
  * @mult:		cycle to nanosecond multiplier
  * @shift:		cycle to nanosecond divisor (power of two)
- * @update_callback:	called when safe to alter clocksource values
  * @is_continuous:	defines if clocksource is free-running.
  * @cycle_interval:	Used internally by timekeeping core, please ignore.
  * @xtime_interval:	Used internally by timekeeping core, please ignore.
@@ -72,7 +71,6 @@ struct clocksource {
 	cycle_t mask;
 	u32 mult;
 	u32 shift;
-	int (*update_callback)(void);
 	int is_continuous;
 
 	/* timekeeping specific data, ignore */

--


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH 08/10] -mm: clocksource: initialize list value
  2006-10-06 18:54 [PATCH 00/10] -mm: generic clocksource API -v2 Daniel Walker
                   ` (6 preceding siblings ...)
  2006-10-06 18:54 ` [PATCH 07/10] -mm: clocksource: remove update_callback Daniel Walker
@ 2006-10-06 18:54 ` Daniel Walker
  2006-10-09 19:59   ` john stultz
  2006-10-06 18:54 ` [PATCH 09/10] -mm: clocksource: add generic sched_clock() Daniel Walker
                   ` (2 subsequent siblings)
  10 siblings, 1 reply; 31+ messages in thread
From: Daniel Walker @ 2006-10-06 18:54 UTC (permalink / raw)
  To: akpm; +Cc: linux-kernel, johnstul

[-- Attachment #1: clocksouce_list_init.patch --]
[-- Type: text/plain, Size: 3803 bytes --]

A change to clocksource initialization. It's optional for new clocksources,
but prefered. If the list field is initialized it allows clocksource_register 
to complete faster since it doesn't have the scan the list of clocks doing 
strcmp on each.

Signed-Off-By: Daniel Walker <dwalker@mvista.com>

---
 arch/i386/kernel/hpet.c          |    1 +
 arch/i386/kernel/i8253.c         |    1 +
 arch/i386/kernel/tsc.c           |    1 +
 drivers/clocksource/acpi_pm.c    |    1 +
 drivers/clocksource/cyclone.c    |    1 +
 drivers/clocksource/scx200_hrt.c |    1 +
 include/linux/clocksource.h      |    3 +++
 7 files changed, 9 insertions(+)

Index: linux-2.6.18/arch/i386/kernel/hpet.c
===================================================================
--- linux-2.6.18.orig/arch/i386/kernel/hpet.c
+++ linux-2.6.18/arch/i386/kernel/hpet.c
@@ -27,6 +27,7 @@ static struct clocksource clocksource_hp
 	.mult		= 0, /* set below */
 	.shift		= HPET_SHIFT,
 	.is_continuous	= 1,
+	.list		= CLOCKSOURCE_LIST_INIT(clocksource_hpet.list),
 };
 
 static int __init init_hpet_clocksource(void)
Index: linux-2.6.18/arch/i386/kernel/i8253.c
===================================================================
--- linux-2.6.18.orig/arch/i386/kernel/i8253.c
+++ linux-2.6.18/arch/i386/kernel/i8253.c
@@ -105,6 +105,7 @@ static struct clocksource clocksource_pi
 	.mask	= CLOCKSOURCE_MASK(32),
 	.mult	= 0,
 	.shift	= 20,
+	.list	= CLOCKSOURCE_LIST_INIT(clocksource_pit.list),
 };
 
 static int __init init_pit_clocksource(void)
Index: linux-2.6.18/arch/i386/kernel/tsc.c
===================================================================
--- linux-2.6.18.orig/arch/i386/kernel/tsc.c
+++ linux-2.6.18/arch/i386/kernel/tsc.c
@@ -350,6 +350,7 @@ static struct clocksource clocksource_ts
 	.mult			= 0, /* to be set */
 	.shift			= 22,
 	.is_continuous		= 1,
+	.list			= CLOCKSOURCE_LIST_INIT(clocksource_tsc.list),
 };
 
 
Index: linux-2.6.18/drivers/clocksource/acpi_pm.c
===================================================================
--- linux-2.6.18.orig/drivers/clocksource/acpi_pm.c
+++ linux-2.6.18/drivers/clocksource/acpi_pm.c
@@ -73,6 +73,7 @@ static struct clocksource clocksource_ac
 	.mult		= 0, /*to be caluclated*/
 	.shift		= 22,
 	.is_continuous	= 1,
+	.list		= CLOCKSOURCE_LIST_INIT(clocksource_acpi_pm.list),
 };
 
 
Index: linux-2.6.18/drivers/clocksource/cyclone.c
===================================================================
--- linux-2.6.18.orig/drivers/clocksource/cyclone.c
+++ linux-2.6.18/drivers/clocksource/cyclone.c
@@ -32,6 +32,7 @@ static struct clocksource clocksource_cy
 	.mult		= 10,
 	.shift		= 0,
 	.is_continuous	= 1,
+	.list		= CLOCKSOURCE_LIST_INIT(clocksource_cyclone.list),
 };
 
 static int __init init_cyclone_clocksource(void)
Index: linux-2.6.18/drivers/clocksource/scx200_hrt.c
===================================================================
--- linux-2.6.18.orig/drivers/clocksource/scx200_hrt.c
+++ linux-2.6.18/drivers/clocksource/scx200_hrt.c
@@ -58,6 +58,7 @@ static struct clocksource cs_hrt = {
 	.read		= read_hrt,
 	.mask		= CLOCKSOURCE_MASK(32),
 	.is_continuous	= 1,
+	.list		= CLOCKSOURCE_LIST_INIT(cs_hrt.list),
 	/* mult, shift are set based on mhz27 flag */
 };
 
Index: linux-2.6.18/include/linux/clocksource.h
===================================================================
--- linux-2.6.18.orig/include/linux/clocksource.h
+++ linux-2.6.18/include/linux/clocksource.h
@@ -82,6 +82,9 @@ struct clocksource {
 /* simplify initialization of mask field */
 #define CLOCKSOURCE_MASK(bits) (cycle_t)(bits<64 ? ((1ULL<<bits)-1) : -1)
 
+/* Abstracted list initialization */
+#define CLOCKSOURCE_LIST_INIT(x)	LIST_HEAD_INIT(x)
+
 /**
  * clocksource_khz2mult - calculates mult from khz and shift
  * @khz:		Clocksource frequency in KHz

--


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH 09/10] -mm: clocksource: add generic sched_clock()
  2006-10-06 18:54 [PATCH 00/10] -mm: generic clocksource API -v2 Daniel Walker
                   ` (7 preceding siblings ...)
  2006-10-06 18:54 ` [PATCH 08/10] -mm: clocksource: initialize list value Daniel Walker
@ 2006-10-06 18:54 ` Daniel Walker
  2006-10-06 18:54 ` [PATCH 10/10] -mm: clocksource: update kernel parameters Daniel Walker
  2006-10-07  1:53 ` [PATCH 00/10] -mm: generic clocksource API -v2 Andrew Morton
  10 siblings, 0 replies; 31+ messages in thread
From: Daniel Walker @ 2006-10-06 18:54 UTC (permalink / raw)
  To: akpm; +Cc: linux-kernel, johnstul, mingo

[-- Attachment #1: add_generic_sched_clock.patch --]
[-- Type: text/plain, Size: 5808 bytes --]

Adds a generic sched_clock, along with a boot time override for the scheduler 
clocksource (sched_clocksource). Hopefully the config option would eventually 
be removed.

Signed-Off-By: Daniel Walker <dwalker@mvista.com>

---
 arch/i386/Kconfig      |    4 +++
 arch/i386/kernel/tsc.c |   61 ------------------------------------------------
 kernel/sched.c         |   62 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 66 insertions(+), 61 deletions(-)

Index: linux-2.6.18/arch/i386/Kconfig
===================================================================
--- linux-2.6.18.orig/arch/i386/Kconfig
+++ linux-2.6.18/arch/i386/Kconfig
@@ -18,6 +18,10 @@ config GENERIC_TIME
 	bool
 	default y
 
+config GENERIC_SCHED_CLOCK
+	bool
+	default y
+
 config LOCKDEP_SUPPORT
 	bool
 	default y
Index: linux-2.6.18/arch/i386/kernel/tsc.c
===================================================================
--- linux-2.6.18.orig/arch/i386/kernel/tsc.c
+++ linux-2.6.18/arch/i386/kernel/tsc.c
@@ -59,65 +59,6 @@ static inline int check_tsc_unstable(voi
 	return tsc_unstable;
 }
 
-/* Accellerators for sched_clock()
- * convert from cycles(64bits) => nanoseconds (64bits)
- *  basic equation:
- *		ns = cycles / (freq / ns_per_sec)
- *		ns = cycles * (ns_per_sec / freq)
- *		ns = cycles * (10^9 / (cpu_khz * 10^3))
- *		ns = cycles * (10^6 / cpu_khz)
- *
- *	Then we use scaling math (suggested by george@mvista.com) to get:
- *		ns = cycles * (10^6 * SC / cpu_khz) / SC
- *		ns = cycles * cyc2ns_scale / SC
- *
- *	And since SC is a constant power of two, we can convert the div
- *  into a shift.
- *
- *  We can use khz divisor instead of mhz to keep a better percision, since
- *  cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
- *  (mathieu.desnoyers@polymtl.ca)
- *
- *			-johnstul@us.ibm.com "math is hard, lets go shopping!"
- */
-static unsigned long cyc2ns_scale __read_mostly;
-
-#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
-
-static inline void set_cyc2ns_scale(unsigned long cpu_khz)
-{
-	cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
-}
-
-static inline unsigned long long cycles_2_ns(unsigned long long cyc)
-{
-	return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
-}
-
-/*
- * Scheduler clock - returns current time in nanosec units.
- */
-unsigned long long sched_clock(void)
-{
-	unsigned long long this_offset;
-
-	/*
-	 * in the NUMA case we dont use the TSC as they are not
-	 * synchronized across all CPUs.
-	 */
-#ifndef CONFIG_NUMA
-	if (!cpu_khz || check_tsc_unstable())
-#endif
-		/* no locking but a rare wrong value is not a big deal */
-		return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
-
-	/* read the Time Stamp Counter: */
-	rdtscll(this_offset);
-
-	/* return the value in ns */
-	return cycles_2_ns(this_offset);
-}
-
 static unsigned long calculate_cpu_khz(void)
 {
 	unsigned long long start, end;
@@ -201,7 +142,6 @@ void __init tsc_init(void)
 				(unsigned long)cpu_khz / 1000,
 				(unsigned long)cpu_khz % 1000);
 
-	set_cyc2ns_scale(cpu_khz);
 	use_tsc_delay();
 }
 
@@ -277,7 +217,6 @@ time_cpufreq_notifier(struct notifier_bl
 			if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
 				tsc_khz = cpu_khz;
 				tsc_update_khz();
-				set_cyc2ns_scale(cpu_khz);
 				/*
 				 * TSC based sched_clock turns
 				 * to junk w/ cpufreq
Index: linux-2.6.18/kernel/sched.c
===================================================================
--- linux-2.6.18.orig/kernel/sched.c
+++ linux-2.6.18/kernel/sched.c
@@ -16,6 +16,7 @@
  *		by Davide Libenzi, preemptible kernel bits by Robert Love.
  *  2003-09-03	Interactivity tuning by Con Kolivas.
  *  2004-04-02	Scheduler domains code by Nick Piggin
+ *  2006-08-03	Generic sched_clock() implementation by Daniel Walker
  */
 
 #include <linux/mm.h>
@@ -53,6 +54,7 @@
 #include <linux/tsacct_kern.h>
 #include <linux/kprobes.h>
 #include <linux/delayacct.h>
+#include <linux/clocksource.h>
 #include <asm/tlb.h>
 
 #include <asm/unistd.h>
@@ -6909,6 +6911,66 @@ int in_sched_functions(unsigned long add
 		&& addr < (unsigned long)__sched_text_end);
 }
 
+#ifdef CONFIG_GENERIC_SCHED_CLOCK
+static struct clocksource *sched_clocksource = &clocksource_jiffies;
+static char __initdata sched_clock_override[32];
+
+unsigned long long sched_clock(void)
+{
+	return cyc2ns(sched_clocksource, clocksource_read(sched_clocksource));
+}
+
+static int __init boot_override_sched_clocksource(char* str)
+{
+	if (str)
+		strlcpy(sched_clock_override, str,
+			sizeof(sched_clock_override));
+
+	return 1;
+}
+__setup("sched_clocksource=", boot_override_sched_clocksource);
+
+static int
+sched_clock_callback(struct notifier_block *nb, unsigned long op, void *c)
+{
+	/*
+	 * If our clock just became unstable switch to the safe,
+	 * slow, fast jiffies clock.
+	 *
+	 * XXX : We could just switch to the next best clock.
+	 */
+	if (op == CLOCKSOURCE_NOTIFY_RATING && sched_clocksource == c)
+		sched_clocksource = &clocksource_jiffies;
+	return 0;
+}
+
+static struct notifier_block sched_clock_nb = {
+	.notifier_call = sched_clock_callback,
+};
+
+static int __init sched_clock_init(void)
+{
+	clocksource_notifier_register(&sched_clock_nb);
+
+	if (*sched_clock_override != 0) {
+		sched_clocksource = clocksource_get_clock(sched_clock_override);
+		if (unlikely(sched_clocksource == NULL)) {
+			sched_clocksource = clocksource_get_best_clock();
+			printk(KERN_ERR "Warning: "
+			       "Invalid scheduler clock override.\n");
+			return 1;
+		}
+
+		printk(KERN_INFO "Scheduler: %s clocksource has been "
+		       "installed.\n", sched_clocksource->name);
+	} else
+		sched_clocksource = clocksource_get_best_clock();
+
+	return 0;
+}
+__initcall(sched_clock_init);
+#endif /* CONFIG_GENERIC_SCHED_CLOCK */
+
 void __init sched_init(void)
 {
 	int i, j, k;

--


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH 10/10] -mm: clocksource: update kernel parameters
  2006-10-06 18:54 [PATCH 00/10] -mm: generic clocksource API -v2 Daniel Walker
                   ` (8 preceding siblings ...)
  2006-10-06 18:54 ` [PATCH 09/10] -mm: clocksource: add generic sched_clock() Daniel Walker
@ 2006-10-06 18:54 ` Daniel Walker
  2006-10-07  1:53 ` [PATCH 00/10] -mm: generic clocksource API -v2 Andrew Morton
  10 siblings, 0 replies; 31+ messages in thread
From: Daniel Walker @ 2006-10-06 18:54 UTC (permalink / raw)
  To: akpm; +Cc: linux-kernel, johnstul

[-- Attachment #1: add_some_new_clocksource_params.patch --]
[-- Type: text/plain, Size: 1684 bytes --]

Documents two new kernel parameters,

timeofday_clocksource
sched_clocksource

Removed old ones,

clocksource
clock

Signed-Off-By: Daniel Walker <dwalker@mvista.com>

---
 Documentation/kernel-parameters.txt |   17 +++++++----------
 1 files changed, 7 insertions(+), 10 deletions(-)

Index: linux-2.6.18/Documentation/kernel-parameters.txt
===================================================================
--- linux-2.6.18.orig/Documentation/kernel-parameters.txt
+++ linux-2.6.18/Documentation/kernel-parameters.txt
@@ -351,13 +351,6 @@ and is between 256 and 4096 characters. 
 			Value can be changed at runtime via
 				/selinux/checkreqprot.
 
-	clock=		[BUGS=IA-32, HW] gettimeofday clocksource override.
-			[Deprecated]
-			Forces specified clocksource (if avaliable) to be used
-			when calculating gettimeofday(). If specified
-			clocksource is not avalible, it defaults to PIT.
-			Format: { pit | tsc | cyclone | pmtmr }
-
 	disable_8254_timer
 	enable_8254_timer
 			[IA32/X86_64] Disable/Enable interrupt 0 timer routing
@@ -1647,9 +1640,13 @@ and is between 256 and 4096 characters. 
 
 	time		Show timing data prefixed to each printk message line
 
-	clocksource=	[GENERIC_TIME] Override the default clocksource
-			Override the default clocksource and use the clocksource
-			with the name specified.
+	timeofday_clocksource=	[GENERIC_TIME]
+			Override the default time of day clocksource and use
+			the clocksource with the name specified.
+
+	sched_clocksource=	[GENERIC_TIME]
+			Override the default scheduler clocksource. Used for
+			timing only inside the scheduler.
 
 	tipar.timeout=	[HW,PPT]
 			Set communications timeout in tenths of a second

--


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 00/10] -mm: generic clocksource API -v2
  2006-10-06 18:54 [PATCH 00/10] -mm: generic clocksource API -v2 Daniel Walker
                   ` (9 preceding siblings ...)
  2006-10-06 18:54 ` [PATCH 10/10] -mm: clocksource: update kernel parameters Daniel Walker
@ 2006-10-07  1:53 ` Andrew Morton
  2006-10-07  3:10   ` Daniel Walker
  10 siblings, 1 reply; 31+ messages in thread
From: Andrew Morton @ 2006-10-07  1:53 UTC (permalink / raw)
  To: Daniel Walker; +Cc: linux-kernel, johnstul, Thomas Gleixner, Ingo Molnar

On Fri, 06 Oct 2006 11:54:39 -0700
Daniel Walker <dwalker@mvista.com> wrote:

> This patch set is meant to modify the clocksource structure and API so that it
> can be used by more than just the timekeeping code. My motivation is mainly
> that I feel the current clocksource interface could be used for much more
> than just timekeeping. So if we keep the clocksource interface (which I think
> we should) then we should get everything out of it that we can.
> 
> This set modifies the API, then converts the time keeping code over to the new
> API. I also added a generic sched_clock() which just re-uses the clocksource
> interface to provide a high quality scheduling clock (assuming a good
> clocksource). Several ARM board just output nanoseconds based on jiffies which
> is still possible with the generic sched_clock().

Well it all applies on top of the hrtimer/dynticks stuff with only a bit of
fixing needed.  And then it compiles.

But there's been so much time-related work happening lately that I'm
inclined to park this work for a while, give the existing changes time to
settle in and get debugged.  And to give people time to review this new
material.  If that review is positive, we can bring this material into
-mm in a week or so.  OK?



^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 00/10] -mm: generic clocksource API -v2
  2006-10-07  1:53 ` [PATCH 00/10] -mm: generic clocksource API -v2 Andrew Morton
@ 2006-10-07  3:10   ` Daniel Walker
  0 siblings, 0 replies; 31+ messages in thread
From: Daniel Walker @ 2006-10-07  3:10 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, johnstul, Thomas Gleixner, Ingo Molnar

On Fri, 2006-10-06 at 18:53 -0700, Andrew Morton wrote:
> 
> Well it all applies on top of the hrtimer/dynticks stuff with only a bit of
> fixing needed.  And then it compiles.

That makes sense, they should be pretty discrete.

> But there's been so much time-related work happening lately that I'm
> inclined to park this work for a while, give the existing changes time to
> settle in and get debugged.  And to give people time to review this new
> material.  If that review is positive, we can bring this material into
> -mm in a week or so.  OK?

Yeah, I'm not in a rush. Whatever you think is best.

Daniel


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 01/10] -mm: clocksource: increase initcall priority
  2006-10-06 18:54 ` [PATCH 01/10] -mm: clocksource: increase initcall priority Daniel Walker
@ 2006-10-07 15:40   ` OGAWA Hirofumi
  2006-10-07 16:51     ` Daniel Walker
  2006-10-09 18:50   ` john stultz
  1 sibling, 1 reply; 31+ messages in thread
From: OGAWA Hirofumi @ 2006-10-07 15:40 UTC (permalink / raw)
  To: Daniel Walker; +Cc: akpm, linux-kernel, johnstul

Daniel Walker <dwalker@mvista.com> writes:

> Index: linux-2.6.17/drivers/clocksource/acpi_pm.c
> ===================================================================
> --- linux-2.6.17.orig/drivers/clocksource/acpi_pm.c
> +++ linux-2.6.17/drivers/clocksource/acpi_pm.c
> @@ -174,4 +174,4 @@ pm_good:
>  	return clocksource_register(&clocksource_acpi_pm);
>  }
>  
> -module_init(init_acpi_pm_clocksource);
> +postcore_initcall(init_acpi_pm_clocksource);

Current code is assumeing DECLARE_PCI_FIXUP_EARLY() is called before
init_acpi_pm_clocksource().

We'll need to change it.
-- 
OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 01/10] -mm: clocksource: increase initcall priority
  2006-10-07 15:40   ` OGAWA Hirofumi
@ 2006-10-07 16:51     ` Daniel Walker
  2006-10-07 18:00       ` OGAWA Hirofumi
  0 siblings, 1 reply; 31+ messages in thread
From: Daniel Walker @ 2006-10-07 16:51 UTC (permalink / raw)
  To: OGAWA Hirofumi; +Cc: akpm, linux-kernel, johnstul

On Sun, 2006-10-08 at 00:40 +0900, OGAWA Hirofumi wrote:
> Daniel Walker <dwalker@mvista.com> writes:
> 
> > Index: linux-2.6.17/drivers/clocksource/acpi_pm.c
> > ===================================================================
> > --- linux-2.6.17.orig/drivers/clocksource/acpi_pm.c
> > +++ linux-2.6.17/drivers/clocksource/acpi_pm.c
> > @@ -174,4 +174,4 @@ pm_good:
> >  	return clocksource_register(&clocksource_acpi_pm);
> >  }
> >  
> > -module_init(init_acpi_pm_clocksource);
> > +postcore_initcall(init_acpi_pm_clocksource);
> 
> Current code is assumeing DECLARE_PCI_FIXUP_EARLY() is called before
> init_acpi_pm_clocksource().
> 
> We'll need to change it.

We can add a call to clocksource_rating_change() inside
acpi_pm_need_workaround(), are there deeper dependencies?

Daniel


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 01/10] -mm: clocksource: increase initcall priority
  2006-10-07 16:51     ` Daniel Walker
@ 2006-10-07 18:00       ` OGAWA Hirofumi
  2006-10-07 18:41         ` OGAWA Hirofumi
  0 siblings, 1 reply; 31+ messages in thread
From: OGAWA Hirofumi @ 2006-10-07 18:00 UTC (permalink / raw)
  To: Daniel Walker; +Cc: akpm, linux-kernel, johnstul

Daniel Walker <dwalker@mvista.com> writes:

> On Sun, 2006-10-08 at 00:40 +0900, OGAWA Hirofumi wrote:
>> Daniel Walker <dwalker@mvista.com> writes:
>> 
>> > Index: linux-2.6.17/drivers/clocksource/acpi_pm.c
>> > ===================================================================
>> > --- linux-2.6.17.orig/drivers/clocksource/acpi_pm.c
>> > +++ linux-2.6.17/drivers/clocksource/acpi_pm.c
>> > @@ -174,4 +174,4 @@ pm_good:
>> >  	return clocksource_register(&clocksource_acpi_pm);
>> >  }
>> >  
>> > -module_init(init_acpi_pm_clocksource);
>> > +postcore_initcall(init_acpi_pm_clocksource);
>> 
>> Current code is assumeing DECLARE_PCI_FIXUP_EARLY() is called before
>> init_acpi_pm_clocksource().
>> 
>> We'll need to change it.
>
> We can add a call to clocksource_rating_change() inside
> acpi_pm_need_workaround(), are there deeper dependencies?

There is no deeper dependencies.  If it's meaning
clocksource_reselect() in current git, it sounds good to me.
-- 
OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 01/10] -mm: clocksource: increase initcall priority
  2006-10-07 18:00       ` OGAWA Hirofumi
@ 2006-10-07 18:41         ` OGAWA Hirofumi
  2006-10-07 18:44           ` Daniel Walker
  0 siblings, 1 reply; 31+ messages in thread
From: OGAWA Hirofumi @ 2006-10-07 18:41 UTC (permalink / raw)
  To: Daniel Walker; +Cc: akpm, linux-kernel, johnstul

OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> writes:

> Daniel Walker <dwalker@mvista.com> writes:
>
>> On Sun, 2006-10-08 at 00:40 +0900, OGAWA Hirofumi wrote:
>>> Daniel Walker <dwalker@mvista.com> writes:
>>> 
>>> > Index: linux-2.6.17/drivers/clocksource/acpi_pm.c
>>> > ===================================================================
>>> > --- linux-2.6.17.orig/drivers/clocksource/acpi_pm.c
>>> > +++ linux-2.6.17/drivers/clocksource/acpi_pm.c
>>> > @@ -174,4 +174,4 @@ pm_good:
>>> >  	return clocksource_register(&clocksource_acpi_pm);
>>> >  }
>>> >  
>>> > -module_init(init_acpi_pm_clocksource);
>>> > +postcore_initcall(init_acpi_pm_clocksource);
>>> 
>>> Current code is assumeing DECLARE_PCI_FIXUP_EARLY() is called before
>>> init_acpi_pm_clocksource().
>>> 
>>> We'll need to change it.
>>
>> We can add a call to clocksource_rating_change() inside
>> acpi_pm_need_workaround(), are there deeper dependencies?
>
> There is no deeper dependencies.  If it's meaning
> clocksource_reselect() in current git, it sounds good to me.

Ah, I was forgetting why I didn't before. If it's a buggy pmtmr, we'll
get corrupted time until re-selecting the clocksource.

If anybody doesn't care this will be good with it. If not, we would
need to back to old one. IIRC, John did it.
-- 
OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 01/10] -mm: clocksource: increase initcall priority
  2006-10-07 18:41         ` OGAWA Hirofumi
@ 2006-10-07 18:44           ` Daniel Walker
  2006-10-07 19:19             ` OGAWA Hirofumi
  0 siblings, 1 reply; 31+ messages in thread
From: Daniel Walker @ 2006-10-07 18:44 UTC (permalink / raw)
  To: OGAWA Hirofumi; +Cc: akpm, linux-kernel, johnstul

On Sun, 2006-10-08 at 03:41 +0900, OGAWA Hirofumi wrote:

> >>> 
> >>> We'll need to change it.
> >>
> >> We can add a call to clocksource_rating_change() inside
> >> acpi_pm_need_workaround(), are there deeper dependencies?
> >
> > There is no deeper dependencies.  If it's meaning
> > clocksource_reselect() in current git, it sounds good to me.
> 
> Ah, I was forgetting why I didn't before. If it's a buggy pmtmr, we'll
> get corrupted time until re-selecting the clocksource.
> 
> If anybody doesn't care this will be good with it. If not, we would
> need to back to old one. IIRC, John did it.

We could just reverse it, use the verified read function until we know
it's a good PM timer, then switch to the faster read function.

Daniel


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 01/10] -mm: clocksource: increase initcall priority
  2006-10-07 18:44           ` Daniel Walker
@ 2006-10-07 19:19             ` OGAWA Hirofumi
  0 siblings, 0 replies; 31+ messages in thread
From: OGAWA Hirofumi @ 2006-10-07 19:19 UTC (permalink / raw)
  To: Daniel Walker; +Cc: akpm, linux-kernel, johnstul

Daniel Walker <dwalker@mvista.com> writes:

> On Sun, 2006-10-08 at 03:41 +0900, OGAWA Hirofumi wrote:
>
>> >>> 
>> >>> We'll need to change it.
>> >>
>> >> We can add a call to clocksource_rating_change() inside
>> >> acpi_pm_need_workaround(), are there deeper dependencies?
>> >
>> > There is no deeper dependencies.  If it's meaning
>> > clocksource_reselect() in current git, it sounds good to me.
>> 
>> Ah, I was forgetting why I didn't before. If it's a buggy pmtmr, we'll
>> get corrupted time until re-selecting the clocksource.
>> 
>> If anybody doesn't care this will be good with it. If not, we would
>> need to back to old one. IIRC, John did it.
>
> We could just reverse it, use the verified read function until we know
> it's a good PM timer, then switch to the faster read function.

Yes, we did it in old timer code.
-- 
OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 05/10] -mm: clocksource: convert generic timeofday
  2006-10-06 18:54 ` [PATCH 05/10] -mm: clocksource: convert generic timeofday Daniel Walker
@ 2006-10-07 23:04   ` Oleg Verych
  2006-10-09 19:39   ` john stultz
  1 sibling, 0 replies; 31+ messages in thread
From: Oleg Verych @ 2006-10-07 23:04 UTC (permalink / raw)
  To: linux-kernel

Andrew,

On 2006-10-06, Daniel Walker wrote:
[]
> +int clocksource_sysfs_register(struct sysdev_attribute * attr)
[]
>  static int __init init_clocksource_sysfs(void)

it seems kernel/time/clocksource.c doesn't honor CONFIG_SYSFS at
all...

Maybe this option isn't needed any more ? It doesn't exist in
menuconfig, and i couldn't set in `n' manually.

[]
> @@ -17,6 +17,8 @@
>   *  2000-10-05  Implemented scalable SMP per-CPU timer handling.
>   *                              Copyright (C) 2000, 2001, 2002  Ingo Molnar
>   *              Designed by David S. Miller, Alexey Kuznetsov and Ingo Molnar
> + *  2006-08-03  Added usage of the generic clocksource API
> + *				Copyright (C) 2006 MontaVista, Daniel Walker
>   */

there are comments about having new source management system, thus, logs
on top are not needed any more...
  
-o--=O`C
 #oo'L O
<___=E M



^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 01/10] -mm: clocksource: increase initcall priority
  2006-10-06 18:54 ` [PATCH 01/10] -mm: clocksource: increase initcall priority Daniel Walker
  2006-10-07 15:40   ` OGAWA Hirofumi
@ 2006-10-09 18:50   ` john stultz
  2006-10-09 19:24     ` Daniel Walker
  1 sibling, 1 reply; 31+ messages in thread
From: john stultz @ 2006-10-09 18:50 UTC (permalink / raw)
  To: Daniel Walker; +Cc: akpm, linux-kernel

On Fri, 2006-10-06 at 11:54 -0700, Daniel Walker wrote:
> plain text document attachment (clocksource_init_call.patch)
> Since it's likely that this interface would get used during bootup 
> I moved all the clocksource registration into the postcore initcall. 

So this is still somewhat of an open question: While timekeeping_init
runs quite early, and the timekeeping subsystem and its interface is
usable early in the boot process, currently not all the clocksources are
available as early. This is by design, as there may be clocksource
driver modules loaded later on in the boot process, so we don't want to
require it early.

So, the question becomes: Do we want to start using arch specific
clocksources as early as possible, with the potential that we'll replace
it when a better one shows up later? It would allow for finer grained
timekeeping early in boot, which sounds nice, but I'm not sure how great
the real need is for that.

> This also eliminated some clocksource shuffling during bootup.

Actually, I'm not sure I see this. Which shuffling does it avoid? 

I suspect it might actually cause more shuffling, as some clocksources
(well, just the TSC, really.. its such a pain...) are not disqualified
until later because we don't know if the system will enter C3, or change
cpufreq, etc..  By waiting longer, we increase the chance that those
disqualifying actions will occur before we install it.

thanks
-john


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 02/10] -mm: clocksource: small cleanup
  2006-10-06 18:54 ` [PATCH 02/10] -mm: clocksource: small cleanup Daniel Walker
@ 2006-10-09 18:51   ` john stultz
  0 siblings, 0 replies; 31+ messages in thread
From: john stultz @ 2006-10-09 18:51 UTC (permalink / raw)
  To: Daniel Walker; +Cc: akpm, linux-kernel

On Fri, 2006-10-06 at 11:54 -0700, Daniel Walker wrote:
> plain text document attachment (clocksource_cleanup.patch)
> Mostly changing alignment. Just some general cleanup.
> 
> Signed-Off-By: Daniel Walker <dwalker@mvista.com>
> Acked-by: John Stultz <johnstul@us.ibm.com>

Just a re-ack. :) This all looks fine.

thanks
-john



^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 04/10] -mm: clocksource: add some new API calls
  2006-10-06 18:54 ` [PATCH 04/10] -mm: clocksource: add some new " Daniel Walker
@ 2006-10-09 19:01   ` john stultz
  0 siblings, 0 replies; 31+ messages in thread
From: john stultz @ 2006-10-09 19:01 UTC (permalink / raw)
  To: Daniel Walker; +Cc: akpm, linux-kernel

On Fri, 2006-10-06 at 11:54 -0700, Daniel Walker wrote:
> plain text document attachment (clocksource_user_api.patch)
> originally used plist but removed it in this patch, and used a sorted list 
> which is just as fast with a lot less memory overhead.
> 
> Introduces some new API calls,
> 
> - clocksource_get_clock()
> 	Allows a clock lookup by name.
> - clocksource_rating_change()
> 	Used by a clocksource to signal a rating change. Replaces 
> 	reselect_clocksource()

Adds ns2cyc interface. Just to keep things in logical chunks, maybe
would that chunk be better added with the patch that uses it?

> I also moved the the clock source list to a plist, which removes some lookup
> overhead in the average case.

Probably need to update the header, as you don't use a plist now. :)


> Signed-Off-By: Daniel Walker <dwalker@mvista.com>


Otherwise this patch looks like a good cleanup to me.

Acked-by: John Stultz <johnstul@us.ibm.com>

thanks
-john





^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 01/10] -mm: clocksource: increase initcall priority
  2006-10-09 18:50   ` john stultz
@ 2006-10-09 19:24     ` Daniel Walker
  2006-10-09 19:49       ` Thomas Gleixner
  0 siblings, 1 reply; 31+ messages in thread
From: Daniel Walker @ 2006-10-09 19:24 UTC (permalink / raw)
  To: john stultz; +Cc: akpm, linux-kernel

On Mon, 2006-10-09 at 11:50 -0700, john stultz wrote:
> On Fri, 2006-10-06 at 11:54 -0700, Daniel Walker wrote:
> > plain text document attachment (clocksource_init_call.patch)
> > Since it's likely that this interface would get used during bootup 
> > I moved all the clocksource registration into the postcore initcall. 
> 
> So this is still somewhat of an open question: While timekeeping_init
> runs quite early, and the timekeeping subsystem and its interface is
> usable early in the boot process, currently not all the clocksources are
> available as early. This is by design, as there may be clocksource
> driver modules loaded later on in the boot process, so we don't want to
> require it early.
> 
> So, the question becomes: Do we want to start using arch specific
> clocksources as early as possible, with the potential that we'll replace
> it when a better one shows up later? It would allow for finer grained
> timekeeping early in boot, which sounds nice, but I'm not sure how great
> the real need is for that.

My main motivation is that I'm assuming other uses of the interface will
exist. Then anything that uses the interface after postcore will avoid
switching clocks later.

If I for instance, just return clocksource_jiffies until the system is
fully booted then any thing that got a clock early, even during part of
device_initcall, would end up switching to a new clock when boot up
finished. I think that was acceptable when just timekeeping might have
been using the interface, but I don't know that it would scale well from
1 to 2 to 5 users of the interface. Then you would have several clock
switches happened after boot up .

> > This also eliminated some clocksource shuffling during bootup.
> 
> Actually, I'm not sure I see this. Which shuffling does it avoid? 

The shuffling that you commented about in kernel/time/clocksource.c
which was the reason for the "finished_booting" flag, and related code.

> I suspect it might actually cause more shuffling, as some clocksources
> (well, just the TSC, really.. its such a pain...) are not disqualified
> until later because we don't know if the system will enter C3, or change
> cpufreq, etc..  By waiting longer, we increase the chance that those
> disqualifying actions will occur before we install it.

This is something I've struggled with, but it's still and issue with the
current code to a lesser degree. Like cpufreq isn't likely to be used
during bootup, but acpi sleep states might be..

The current code can be classified as avoiding shuffle, but not
eliminating it. Like with the acpi_pm timer, I was thinking I'd just put
it back into device_initcall and assume shuffling might happen in rare
cases.

Daniel


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 05/10] -mm: clocksource: convert generic timeofday
  2006-10-06 18:54 ` [PATCH 05/10] -mm: clocksource: convert generic timeofday Daniel Walker
  2006-10-07 23:04   ` Oleg Verych
@ 2006-10-09 19:39   ` john stultz
  2006-10-09 20:19     ` Daniel Walker
  1 sibling, 1 reply; 31+ messages in thread
From: john stultz @ 2006-10-09 19:39 UTC (permalink / raw)
  To: Daniel Walker; +Cc: akpm, linux-kernel

On Fri, 2006-10-06 at 11:54 -0700, Daniel Walker wrote:
> plain text document attachment (clocksource_more_generic.patch)
> Delete alot of remaining code in kernel/time/clocksource.c that
> is replaced with this patch. Removed the deprecated "clock" kernel
> parameter. 

Hmmm. This patch is a bit more confusing. From first glance it looks
like a lot of code churn for not a whole lot of benefit. And as Thomas
already mentioned, you should probably leave the "clock" bit alone for
now.

> Shifts some of the code around so that the time of day override 
> happens inside kernel/timer.c.

Maybe could you explain your rational for this a bit more. I personally
prefer the current breakdown, where the clocksource selection and
override bits are in the clocksource code.

Currently the layering is like this:
[timekeeping core]
[clocksource core]
[clocksource drivers]

Where timekeeping tries to have as little knowledge as possible of the
details of the clocksource drivers (outside of basic knowledge of how to
use them). To the timekeeping core, all clocksources are the same. It
leaves the selection algorithm and its management up to the clocksource
core.

Now if I understand your intent you seem to be splitting it up a bit:

[timekeeping core]                   [sched_clock code]
[timekeeping clocksource selection]  [sched_clock clocksource selection]
[                          clocksource core                            ]
[                        clocksource drivers                           ]


Where the selection code is moved from the clocksource core into the
timekeeping code. I can understand some of the rational, as timekeeping
and sched_clock have differing selection criteria, so why not have
separate logic and share the clocksource core.

So, here's where I'm coming from on this issue: I feel sched_clock to be
a unfortunately necessary hack. Ideally timekeeping reads should be fast
enough to do from the scheduler, but that just is not the case (just on
most arches, some don't have this problem). And since its just scheduler
decisions, it does not need the correctness that timekeeping needs, so
we have this arch specific hook that does whatever it needs. And since
its sorta simple and stupid, the code duplication is pretty minor (no
NTP adjustments, etc). 

In my mind this reduces the benefit gained from making a generic
sched_clock. Currently the clocksource rating logic for timekeeping is
pretty simple: "weigh correctness first, then consider performance". 

Now to have a generic sched_clock, we're going to have to introduce a
new rating scale, which would select a more vague "speed over
correctness, as long as its totally not insane" logic. To me, it seems a
bit complicated for generic logic.

Thus I prefer having the clocksource core keep the an understanding of
the "correctness first, then performance".

Now, I don't want to discourage your efforts here (BTW: I really
appreciate them, and I think attempting new users for the clocksource
infrastructure will make that infrastructure cleaner and better). It
seems perfectly logical to use the clocksource infrastructure in
sched_clock, but maybe, since sched_clock is the necessary hack that it
is, we can do a more minor cleanup, with less impact to the clocksource
infrastructure?

Maybe an idea just to start, would be to have the arch specific
sched_clock() code use __get_clock(char* name), with its internal
selection logic based on the clocksource names. This will then have
minor impact on the current timekeeping/clocksource core code, but still
allow for some reduction in code duplication. Then as hardware
clocksources are tested for viability (for example, HPET may be a good
bet here, but ACPI PM would not), we can add that logic to the arch
specific sched_clock code.

Sound reasonable?


Anyway, that was a bit long winded. I apologize. More specific comments
below:

> The biggest timeofday changes are in update_wall_time() and
> change_clocksource(). I removed the unconditional call to 
> change_clocksource(), and replaced it with a single atomic
> check. The atomic is asserted only when a clock change is
> needed. update_callback is no longer driven from 
> update_wall_time().
> 
> The fast path is now a single atomic check.
> 
> Signed-Off-By: Daniel Walker <dwalker@mvista.com>
> 
> ---
>  include/linux/clocksource.h |    3 
>  kernel/time/clocksource.c   |  216 ++++++--------------------------------------
>  kernel/timer.c              |  167 +++++++++++++++++++++++++++++-----
>  3 files changed, 178 insertions(+), 208 deletions(-)
> 
> Index: linux-2.6.18/include/linux/clocksource.h
> ===================================================================
> --- linux-2.6.18.orig/include/linux/clocksource.h
> +++ linux-2.6.18/include/linux/clocksource.h
> @@ -205,7 +205,8 @@ static inline void clocksource_calculate
>  
> 
>  /* used to install a new clocksource */
> -extern struct clocksource *clocksource_get_next(void);
> +extern int clocksource_sysfs_register(struct sysdev_attribute*);
> +extern void clocksource_sysfs_unregister(struct sysdev_attribute*);
>  extern int clocksource_register(struct clocksource*);
>  extern void clocksource_rating_change(struct clocksource*);
>  extern struct clocksource * clocksource_get_clock(char*);
> Index: linux-2.6.18/kernel/time/clocksource.c
> ===================================================================
> --- linux-2.6.18.orig/kernel/time/clocksource.c
> +++ linux-2.6.18/kernel/time/clocksource.c
> @@ -5,6 +5,8 @@
>   *
>   * Copyright (C) 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com)
>   *
> + * Copyright (C) 2006 MontaVista Daniel Walker (dwalker@mvista.com)
> + *
>   * This program is free software; you can redistribute it and/or modify
>   * it under the terms of the GNU General Public License as published by
>   * the Free Software Foundation; either version 2 of the License, or
> @@ -21,7 +23,6 @@
>   *
>   * TODO WishList:
>   *   o Allow clocksource drivers to be unregistered
> - *   o get rid of clocksource_jiffies extern
>   */
>  
>  #include <linux/clocksource.h>
> @@ -29,45 +30,15 @@
>  #include <linux/init.h>
>  #include <linux/module.h>
>  
> -/* XXX - Would like a better way for initializing curr_clocksource */
> -extern struct clocksource clocksource_jiffies;
> -
>  /*[Clocksource internal variables]---------
> - * curr_clocksource:
> - *	currently selected clocksource. Initialized to clocksource_jiffies.
> - * next_clocksource:
> - *	pending next selected clocksource.
>   * clocksource_list:
>   *	priority list with the registered clocksources
>   * clocksource_lock:
> - *	protects manipulations to curr_clocksource and next_clocksource
> - *	and the clocksource_list
> - * override_name:
> - *	Name of the user-specified clocksource.
> + * 	protects manipulations to the clocksource_list
>   */
> -static struct clocksource *curr_clocksource = &clocksource_jiffies;
> -static struct clocksource *next_clocksource;
> -static struct list_head clocksource_list = LIST_HEAD_INIT(clocksource_list);
> +static __read_mostly
> +struct list_head clocksource_list = LIST_HEAD_INIT(clocksource_list);
>  static DEFINE_SPINLOCK(clocksource_lock);
> -static char override_name[32];
> -
> -/**
> - * clocksource_get_next - Returns the selected clocksource
> - *
> - */
> -struct clocksource *clocksource_get_next(void)
> -{
> -	unsigned long flags;
> -
> -	spin_lock_irqsave(&clocksource_lock, flags);
> -	if (next_clocksource) {
> -		curr_clocksource = next_clocksource;
> -		next_clocksource = NULL;
> -	}
> -	spin_unlock_irqrestore(&clocksource_lock, flags);
> -
> -	return curr_clocksource;
> -}
>  
>  /**
>   * __is_registered - Returns a clocksource if it's registered
> @@ -95,28 +66,6 @@ static struct clocksource * __is_registe
>  }
>  
>  /**
> - * __get_clock - Finds a specific clocksource
> - * @name:		name of the clocksource to return
> - *
> - * Private function. Must hold clocksource_lock when called.
> - *
> - * Returns the clocksource if registered, zero otherwise.
> - * If the @name is null the highest rated clock is returned.
> - */
> -static inline struct clocksource * __get_clock(char * name)
> -{
> -
> -	if (unlikely(list_empty(&clocksource_list)))
> -		return &clocksource_jiffies;
> -
> -	if (!name)
> -		return list_entry(clocksource_list.next,
> -				  struct clocksource, list);
> -
> -	return __is_registered(name);
> -}
> -

Errr.. Wasn't this function just added in the last patch? Can we reduce
the churn here a bit?


> -/**
>   * clocksource_get_clock - Finds a specific clocksource
>   * @name:		name of the clocksource to return
>   *
> @@ -128,29 +77,17 @@ struct clocksource * clocksource_get_clo
>  	unsigned long flags;
>  
>  	spin_lock_irqsave(&clocksource_lock, flags);
> -	ret = __get_clock(name);
> +	if (unlikely(list_empty(&clocksource_list)))
> +		ret = &clocksource_jiffies;
> +	else if (!name)
> +		ret = list_entry(clocksource_list.next,
> +				 struct clocksource, list);
> +	else
> +		ret = __is_registered(name);
>  	spin_unlock_irqrestore(&clocksource_lock, flags);
>  	return ret;
>  }
>  

[snip]

> @@ -952,10 +1061,26 @@ static void update_wall_time(void)
>  	clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift;
>  
>  	/* check to see if there is a new clocksource to use */
> -	if (change_clocksource()) {
> +	if (unlikely(atomic_read(&clock_check))) {
> +
> +		/*
> +		 * Switch to the new override clock, or the highest
> +		 * rated clock.
> +		 */
> +		if (*clock_override_name)
> +			change_clocksource(clock_override_name);
> +		else
> +			change_clocksource(NULL);
> +
>  		clock->error = 0;
>  		clock->xtime_nsec = 0;
>  		clocksource_calculate_interval(clock, tick_nsec);
> +
> +		/*
> +		 * Remove the change signal
> +		 */
> +		atomic_dec(&clock_check);
> +
>  	}
>  }
>  
I think this last chunk (changing the clocksource switching logic) has
some potential. Mind breaking it out into a separate patch?

thanks
-john



^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 06/10] -mm: clocksource: add block notifier
  2006-10-06 18:54 ` [PATCH 06/10] -mm: clocksource: add block notifier Daniel Walker
@ 2006-10-09 19:45   ` john stultz
  0 siblings, 0 replies; 31+ messages in thread
From: john stultz @ 2006-10-09 19:45 UTC (permalink / raw)
  To: Daniel Walker; +Cc: akpm, linux-kernel

On Fri, 2006-10-06 at 11:54 -0700, Daniel Walker wrote:
> plain text document attachment
> (clocksource_add_block_notify_on_new_clock.patch)
> Adds a call back interface for register/rating change
> events.
> 
> Signed-Off-By: Daniel Walker <dwalker@mvista.com>

This one looks interesting, but I'm not sure if we yet have the use-case
really needed to implement notifier infrastructure (taking the comments
from my last discussion). I don't really have any criticism with the
code, I just worry it might be over-doing it.

Maybe it would be more persuasive if it went together with the first
users of it, rather then as a independent infrastructure buildup patch.

thanks
-john






^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 01/10] -mm: clocksource: increase initcall priority
  2006-10-09 19:24     ` Daniel Walker
@ 2006-10-09 19:49       ` Thomas Gleixner
  0 siblings, 0 replies; 31+ messages in thread
From: Thomas Gleixner @ 2006-10-09 19:49 UTC (permalink / raw)
  To: Daniel Walker; +Cc: john stultz, akpm, linux-kernel

On Mon, 2006-10-09 at 12:24 -0700, Daniel Walker wrote:
> > So, the question becomes: Do we want to start using arch specific
> > clocksources as early as possible, with the potential that we'll replace
> > it when a better one shows up later? It would allow for finer grained
> > timekeeping early in boot, which sounds nice, but I'm not sure how great
> > the real need is for that.
> 
> My main motivation is that I'm assuming other uses of the interface will
> exist. Then anything that uses the interface after postcore will avoid
> switching clocks later.

What does it matter, when clocks switch? That's a one time event.

The boot code is different and there is nothing which requires the
availability of that interface in the early boot process.

> If I for instance, just return clocksource_jiffies until the system is
> fully booted then any thing that got a clock early, even during part of
> device_initcall, would end up switching to a new clock when boot up
> finished. I think that was acceptable when just timekeeping might have
> been using the interface, but I don't know that it would scale well from
> 1 to 2 to 5 users of the interface. Then you would have several clock
> switches happened after boot up .

What are the 5 users of that interface ? 

There is one existing user, one artifical you try to create for
sched_clock and some handwaving about instrumentation. Even when we have
5 users, then the switching of clocks does not matter anything. This
happens the same way, when I load the highest rated clock source as a
module during init.

> > I suspect it might actually cause more shuffling, as some clocksources
> > (well, just the TSC, really.. its such a pain...) are not disqualified
> > until later because we don't know if the system will enter C3, or change
> > cpufreq, etc..  By waiting longer, we increase the chance that those
> > disqualifying actions will occur before we install it.
> 
> This is something I've struggled with, but it's still and issue with the
> current code to a lesser degree. Like cpufreq isn't likely to be used
> during bootup, but acpi sleep states might be..

Lesser degree ? You have an issue, which was not there before you made
the changes. This is simply regression and the ignoring of that
regression is wilful breakage.

> The current code can be classified as avoiding shuffle, but not
> eliminating it. Like with the acpi_pm timer, I was thinking I'd just put
> it back into device_initcall and assume shuffling might happen in rare
> cases.

Great. We install TSC in early boot and pmtimer in late boot. Now we
have the fun of unsynced TSCs and time going backwards again, until we
get pmtimer loaded. I have explained it to you already, but that is the
reason why that stuff was moved into late boot.

Also you have not yet once given an explanation other than the "magic
churn", why this is necessary. There is no churn. The clocksource layer
is designed to allow the replacement of clock sources and it does not
hurt at all. It does also not matter if there are 1 ore 100 users.

	tglx



^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 07/10] -mm: clocksource: remove update_callback
  2006-10-06 18:54 ` [PATCH 07/10] -mm: clocksource: remove update_callback Daniel Walker
@ 2006-10-09 19:56   ` john stultz
  0 siblings, 0 replies; 31+ messages in thread
From: john stultz @ 2006-10-09 19:56 UTC (permalink / raw)
  To: Daniel Walker; +Cc: akpm, linux-kernel, Thomas Gleixner

On Fri, 2006-10-06 at 11:54 -0700, Daniel Walker wrote:
> plain text document attachment
> (clocksource_remove_update_callback.patch)
> Inorporated John's comment of moving the rating change code
> into mark_tsc_unstable() , then I renamed tsc_update_callback()
> to tsc_update_khz() and added a call to it into places that change
> the tsc_khz variable.
> 
> With the new notifier block the update_callback becomes
> obsolete.
> 
> Signed-Off-By: Daniel Walker <dwalker@mvista.com>

This (ie: dropping the update_callback logic) basically looks fine.
Although it builds ontop of some of the patches I'm not excited about,
so I'd probably want to see it independent of those changes.


On a related note: I take exception (albeit lightheartedly - I think he
was painting with a broad brush) to Thomas' comment earlier about
"fragile timekeeping". I feel the timekeeping code is really *leaps and
bounds* better with regard to correctness and robustness. There have
been a few embarrassing flubs, but really I do think we've made
improvements.

Now, with that said, I will admit that the TSC code is somewhat fragile
(and I believe this is what Thomas was getting at). Its been the real
problem causer in most cases, so we need to be careful about changes to
its logic that will affect clocksource selection.

So this patch in-particular, however reworked, would need some careful
testing.

thanks
-john

> ---
>  arch/i386/kernel/tsc.c      |   54 ++++++++++++++++++++++----------------------
>  include/linux/clocksource.h |    2 -
>  2 files changed, 28 insertions(+), 28 deletions(-)
> 
> Index: linux-2.6.18/arch/i386/kernel/tsc.c
> ===================================================================
> --- linux-2.6.18.orig/arch/i386/kernel/tsc.c
> +++ linux-2.6.18/arch/i386/kernel/tsc.c
> @@ -50,8 +50,7 @@ static int __init tsc_setup(char *str)
>  __setup("notsc", tsc_setup);
>  
>  /*
> - * code to mark and check if the TSC is unstable
> - * due to cpufreq or due to unsynced TSCs
> + * Flag that denotes an unstable tsc and check function.
>   */
>  static int tsc_unstable;
>  
> @@ -60,12 +59,6 @@ static inline int check_tsc_unstable(voi
>  	return tsc_unstable;
>  }
>  
> -void mark_tsc_unstable(void)
> -{
> -	tsc_unstable = 1;
> -}
> -EXPORT_SYMBOL_GPL(mark_tsc_unstable);
> -
>  /* Accellerators for sched_clock()
>   * convert from cycles(64bits) => nanoseconds (64bits)
>   *  basic equation:
> @@ -171,6 +164,21 @@ err:
>  	return 0;
>  }
>  
> +#if !defined(CONFIG_SMP) || defined(CONFIG_CPU_FREQ)
> +static struct clocksource clocksource_tsc;
> +static unsigned long current_tsc_khz;
> +static void tsc_update_khz(void)
> +{
> +	/* only update if tsc_khz has changed: */
> +	if (current_tsc_khz != tsc_khz) {
> +		current_tsc_khz = tsc_khz;
> +		clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz,
> +							clocksource_tsc.shift);
> +	}
> +
> +}
> +#endif
> +
>  int recalibrate_cpu_khz(void)
>  {
>  #ifndef CONFIG_SMP
> @@ -179,6 +187,7 @@ int recalibrate_cpu_khz(void)
>  	if (cpu_has_tsc) {
>  		cpu_khz = calculate_cpu_khz();
>  		tsc_khz = cpu_khz;
> +		tsc_update_khz();
>  		cpu_data[0].loops_per_jiffy =
>  			cpufreq_scale(cpu_data[0].loops_per_jiffy,
>  					cpu_khz_old, cpu_khz);
> @@ -282,6 +291,7 @@ time_cpufreq_notifier(struct notifier_bl
>  						ref_freq, freq->new);
>  			if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
>  				tsc_khz = cpu_khz;
> +				tsc_update_khz();
>  				set_cyc2ns_scale(cpu_khz);
>  				/*
>  				 * TSC based sched_clock turns
> @@ -322,7 +332,6 @@ core_initcall(cpufreq_tsc);
>  /* clock source code */
>  
>  static unsigned long current_tsc_khz = 0;
> -static int tsc_update_callback(void);
>  
>  static cycle_t read_tsc(void)
>  {
> @@ -340,31 +349,24 @@ static struct clocksource clocksource_ts
>  	.mask			= CLOCKSOURCE_MASK(64),
>  	.mult			= 0, /* to be set */
>  	.shift			= 22,
> -	.update_callback	= tsc_update_callback,
>  	.is_continuous		= 1,
>  };
>  
> -static int tsc_update_callback(void)
> -{
> -	int change = 0;
>  
> -	/* check to see if we should switch to the safe clocksource: */
> -	if (clocksource_tsc.rating != 50 && check_tsc_unstable()) {
> +/*
> + * code to mark if the TSC is unstable
> + * due to cpufreq or due to unsynced TSCs
> + */
> +void mark_tsc_unstable(void)
> +{
> +	if (unlikely(!tsc_unstable)) {
>  		clocksource_tsc.rating = 50;
>  		clocksource_rating_change(&clocksource_tsc);
> -		change = 1;
>  	}
> -
> -	/* only update if tsc_khz has changed: */
> -	if (current_tsc_khz != tsc_khz) {
> -		current_tsc_khz = tsc_khz;
> -		clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz,
> -							clocksource_tsc.shift);
> -		change = 1;
> -	}
> -
> -	return change;
> +	tsc_unstable = 1;
>  }
> +EXPORT_SYMBOL_GPL(mark_tsc_unstable);
> +
>  
>  static int __init dmi_mark_tsc_unstable(struct dmi_system_id *d)
>  {
> Index: linux-2.6.18/include/linux/clocksource.h
> ===================================================================
> --- linux-2.6.18.orig/include/linux/clocksource.h
> +++ linux-2.6.18/include/linux/clocksource.h
> @@ -59,7 +59,6 @@ extern struct clocksource clocksource_ji
>   *			subtraction of non 64 bit counters
>   * @mult:		cycle to nanosecond multiplier
>   * @shift:		cycle to nanosecond divisor (power of two)
> - * @update_callback:	called when safe to alter clocksource values
>   * @is_continuous:	defines if clocksource is free-running.
>   * @cycle_interval:	Used internally by timekeeping core, please ignore.
>   * @xtime_interval:	Used internally by timekeeping core, please ignore.
> @@ -72,7 +71,6 @@ struct clocksource {
>  	cycle_t mask;
>  	u32 mult;
>  	u32 shift;
> -	int (*update_callback)(void);
>  	int is_continuous;
>  
>  	/* timekeeping specific data, ignore */
> 
> --
> 


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 08/10] -mm: clocksource: initialize list value
  2006-10-06 18:54 ` [PATCH 08/10] -mm: clocksource: initialize list value Daniel Walker
@ 2006-10-09 19:59   ` john stultz
  0 siblings, 0 replies; 31+ messages in thread
From: john stultz @ 2006-10-09 19:59 UTC (permalink / raw)
  To: Daniel Walker; +Cc: akpm, linux-kernel

On Fri, 2006-10-06 at 11:54 -0700, Daniel Walker wrote:
> plain text document attachment (clocksouce_list_init.patch)
> A change to clocksource initialization. It's optional for new clocksources,
> but prefered. If the list field is initialized it allows clocksource_register 
> to complete faster since it doesn't have the scan the list of clocks doing 
> strcmp on each.
> 
> Signed-Off-By: Daniel Walker <dwalker@mvista.com>

I dont' have any comments that haven't been said:

Drop the CLOCKSOURCE_LIST_INIT define, and make it required. Its easy
enough to add.

thanks
-john




^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 05/10] -mm: clocksource: convert generic timeofday
  2006-10-09 19:39   ` john stultz
@ 2006-10-09 20:19     ` Daniel Walker
  0 siblings, 0 replies; 31+ messages in thread
From: Daniel Walker @ 2006-10-09 20:19 UTC (permalink / raw)
  To: john stultz; +Cc: akpm, linux-kernel

On Mon, 2006-10-09 at 12:39 -0700, john stultz wrote:
> On Fri, 2006-10-06 at 11:54 -0700, Daniel Walker wrote:
> > plain text document attachment (clocksource_more_generic.patch)
> > Delete alot of remaining code in kernel/time/clocksource.c that
> > is replaced with this patch. Removed the deprecated "clock" kernel
> > parameter. 
> 
> Hmmm. This patch is a bit more confusing. From first glance it looks
> like a lot of code churn for not a whole lot of benefit. And as Thomas
> already mentioned, you should probably leave the "clock" bit alone for
> now.

The benefits is more in the separation of the two. I'll explain the
rational below.

> > Shifts some of the code around so that the time of day override 
> > happens inside kernel/timer.c.
> 
> Maybe could you explain your rational for this a bit more. I personally
> prefer the current breakdown, where the clocksource selection and
> override bits are in the clocksource code.
> 
> Currently the layering is like this:
> [timekeeping core]
> [clocksource core]
> [clocksource drivers]



> Where timekeeping tries to have as little knowledge as possible of the
> details of the clocksource drivers (outside of basic knowledge of how to
> use them). To the timekeeping core, all clocksources are the same. It
> leaves the selection algorithm and its management up to the clocksource
> core.
> 
> Now if I understand your intent you seem to be splitting it up a bit:
> 
> [timekeeping core]                   [sched_clock code]
> [timekeeping clocksource selection]  [sched_clock clocksource selection]
> [                          clocksource core                            ]
> [                        clocksource drivers                           ]
> 

This is mostly how I see the layers, except after sched_clock I see
"..." 

> Where the selection code is moved from the clocksource core into the
> timekeeping code. I can understand some of the rational, as timekeeping
> and sched_clock have differing selection criteria, so why not have
> separate logic and share the clocksource core.
> 
> So, here's where I'm coming from on this issue: I feel sched_clock to be
> a unfortunately necessary hack. Ideally timekeeping reads should be fast
> enough to do from the scheduler, but that just is not the case (just on
> most arches, some don't have this problem). And since its just scheduler
> decisions, it does not need the correctness that timekeeping needs, so
> we have this arch specific hook that does whatever it needs. And since
> its sorta simple and stupid, the code duplication is pretty minor (no
> NTP adjustments, etc). 
> 
> In my mind this reduces the benefit gained from making a generic
> sched_clock. Currently the clocksource rating logic for timekeeping is
> pretty simple: "weigh correctness first, then consider performance". 
> 
> Now to have a generic sched_clock, we're going to have to introduce a
> new rating scale, which would select a more vague "speed over
> correctness, as long as its totally not insane" logic. To me, it seems a
> bit complicated for generic logic.
> 
> Thus I prefer having the clocksource core keep the an understanding of
> the "correctness first, then performance".
> 
> Now, I don't want to discourage your efforts here (BTW: I really
> appreciate them, and I think attempting new users for the clocksource
> infrastructure will make that infrastructure cleaner and better). It
> seems perfectly logical to use the clocksource infrastructure in
> sched_clock, but maybe, since sched_clock is the necessary hack that it
> is, we can do a more minor cleanup, with less impact to the clocksource
> infrastructure?

First, I've not done this clean up specifically for sched_clock. The
sched_clock changes are just there as an example of the interface
usage. 

> Maybe an idea just to start, would be to have the arch specific
> sched_clock() code use __get_clock(char* name), with its internal
> selection logic based on the clocksource names. This will then have
> minor impact on the current timekeeping/clocksource core code, but still
> allow for some reduction in code duplication. Then as hardware
> clocksources are tested for viability (for example, HPET may be a good
> bet here, but ACPI PM would not), we can add that logic to the arch
> specific sched_clock code.
> 
> Sound reasonable?

Yes. We could do this.. It keeps the performance level mostly flat while
removing all the cycles to nanosecond shift logic in every
architecture. 

> 
> Anyway, that was a bit long winded. I apologize. More specific comments
> below:
> 
ok.

> >  
> >  /**
> > - * __get_clock - Finds a specific clocksource
> > - * @name:		name of the clocksource to return
> > - *
> > - * Private function. Must hold clocksource_lock when called.
> > - *
> > - * Returns the clocksource if registered, zero otherwise.
> > - * If the @name is null the highest rated clock is returned.
> > - */
> > -static inline struct clocksource * __get_clock(char * name)
> > -{
> > -
> > -	if (unlikely(list_empty(&clocksource_list)))
> > -		return &clocksource_jiffies;
> > -
> > -	if (!name)
> > -		return list_entry(clocksource_list.next,
> > -				  struct clocksource, list);
> > -
> > -	return __is_registered(name);
> > -}
> > -
> 
> Errr.. Wasn't this function just added in the last patch? Can we reduce
> the churn here a bit?

I tried, but I also have to make each patch compile. So it gets a bit
tricky.

> 
> > @@ -952,10 +1061,26 @@ static void update_wall_time(void)
> >  	clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift;
> >  
> >  	/* check to see if there is a new clocksource to use */
> > -	if (change_clocksource()) {
> > +	if (unlikely(atomic_read(&clock_check))) {
> > +
> > +		/*
> > +		 * Switch to the new override clock, or the highest
> > +		 * rated clock.
> > +		 */
> > +		if (*clock_override_name)
> > +			change_clocksource(clock_override_name);
> > +		else
> > +			change_clocksource(NULL);
> > +
> >  		clock->error = 0;
> >  		clock->xtime_nsec = 0;
> >  		clocksource_calculate_interval(clock, tick_nsec);
> > +
> > +		/*
> > +		 * Remove the change signal
> > +		 */
> > +		atomic_dec(&clock_check);
> > +
> >  	}
> >  }
> >  
> I think this last chunk (changing the clocksource switching logic) has
> some potential. Mind breaking it out into a separate patch?

Maybe, but part of this fell out of reorganizing the code. 

Daniel


^ permalink raw reply	[flat|nested] 31+ messages in thread

end of thread, other threads:[~2006-10-09 20:19 UTC | newest]

Thread overview: 31+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-10-06 18:54 [PATCH 00/10] -mm: generic clocksource API -v2 Daniel Walker
2006-10-06 18:54 ` [PATCH 01/10] -mm: clocksource: increase initcall priority Daniel Walker
2006-10-07 15:40   ` OGAWA Hirofumi
2006-10-07 16:51     ` Daniel Walker
2006-10-07 18:00       ` OGAWA Hirofumi
2006-10-07 18:41         ` OGAWA Hirofumi
2006-10-07 18:44           ` Daniel Walker
2006-10-07 19:19             ` OGAWA Hirofumi
2006-10-09 18:50   ` john stultz
2006-10-09 19:24     ` Daniel Walker
2006-10-09 19:49       ` Thomas Gleixner
2006-10-06 18:54 ` [PATCH 02/10] -mm: clocksource: small cleanup Daniel Walker
2006-10-09 18:51   ` john stultz
2006-10-06 18:54 ` [PATCH 03/10] -mm: clocksource: move old API calls Daniel Walker
2006-10-06 18:54 ` [PATCH 04/10] -mm: clocksource: add some new " Daniel Walker
2006-10-09 19:01   ` john stultz
2006-10-06 18:54 ` [PATCH 05/10] -mm: clocksource: convert generic timeofday Daniel Walker
2006-10-07 23:04   ` Oleg Verych
2006-10-09 19:39   ` john stultz
2006-10-09 20:19     ` Daniel Walker
2006-10-06 18:54 ` [PATCH 06/10] -mm: clocksource: add block notifier Daniel Walker
2006-10-09 19:45   ` john stultz
2006-10-06 18:54 ` [PATCH 07/10] -mm: clocksource: remove update_callback Daniel Walker
2006-10-09 19:56   ` john stultz
2006-10-06 18:54 ` [PATCH 08/10] -mm: clocksource: initialize list value Daniel Walker
2006-10-09 19:59   ` john stultz
2006-10-06 18:54 ` [PATCH 09/10] -mm: clocksource: add generic sched_clock() Daniel Walker
2006-10-06 18:54 ` [PATCH 10/10] -mm: clocksource: update kernel parameters Daniel Walker
2006-10-07  1:53 ` [PATCH 00/10] -mm: generic clocksource API -v2 Andrew Morton
2006-10-07  3:10   ` Daniel Walker
  -- strict thread matches above, loose matches on Subject: below --
2006-08-04  3:24 [PATCH 00/10] -mm generic clocksoure API dwalker
2006-08-04  3:24 ` [PATCH 05/10] -mm clocksource: convert generic timeofday dwalker

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox