* [PATCH 01/10] -mm clocksource: increase initcall priority
2006-08-04 3:24 [PATCH 00/10] -mm generic clocksoure API dwalker
@ 2006-08-04 3:24 ` dwalker
2006-08-04 18:39 ` john stultz
2006-08-04 3:24 ` [PATCH 02/10] -mm clocksource: small cleanup dwalker
` (8 subsequent siblings)
9 siblings, 1 reply; 21+ messages in thread
From: dwalker @ 2006-08-04 3:24 UTC (permalink / raw)
To: akpm; +Cc: linux-kernel, johnstul
[-- Attachment #1: clocksource_init_call.patch --]
[-- Type: text/plain, Size: 4626 bytes --]
Since it's likely that this interface would get used during bootup
I moved all the clocksource registration into the postcore initcall.
This also eliminated some clocksource shuffling during bootup.
Signed-Off-By: Daniel Walker <dwalker@mvista.com>
---
arch/i386/kernel/hpet.c | 2 +-
arch/i386/kernel/i8253.c | 2 +-
arch/i386/kernel/tsc.c | 2 +-
drivers/clocksource/acpi_pm.c | 2 +-
drivers/clocksource/cyclone.c | 2 +-
drivers/clocksource/scx200_hrt.c | 2 +-
kernel/time/clocksource.c | 15 +--------------
kernel/time/jiffies.c | 2 +-
8 files changed, 8 insertions(+), 21 deletions(-)
Index: linux-2.6.17/arch/i386/kernel/hpet.c
===================================================================
--- linux-2.6.17.orig/arch/i386/kernel/hpet.c
+++ linux-2.6.17/arch/i386/kernel/hpet.c
@@ -64,4 +64,4 @@ static int __init init_hpet_clocksource(
return clocksource_register(&clocksource_hpet);
}
-module_init(init_hpet_clocksource);
+postcore_initcall(init_hpet_clocksource);
Index: linux-2.6.17/arch/i386/kernel/i8253.c
===================================================================
--- linux-2.6.17.orig/arch/i386/kernel/i8253.c
+++ linux-2.6.17/arch/i386/kernel/i8253.c
@@ -115,4 +115,4 @@ static int __init init_pit_clocksource(v
clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20);
return clocksource_register(&clocksource_pit);
}
-module_init(init_pit_clocksource);
+postcore_initcall(init_pit_clocksource);
Index: linux-2.6.17/arch/i386/kernel/tsc.c
===================================================================
--- linux-2.6.17.orig/arch/i386/kernel/tsc.c
+++ linux-2.6.17/arch/i386/kernel/tsc.c
@@ -475,4 +475,4 @@ static int __init init_tsc_clocksource(v
return 0;
}
-module_init(init_tsc_clocksource);
+postcore_initcall(init_tsc_clocksource);
Index: linux-2.6.17/drivers/clocksource/acpi_pm.c
===================================================================
--- linux-2.6.17.orig/drivers/clocksource/acpi_pm.c
+++ linux-2.6.17/drivers/clocksource/acpi_pm.c
@@ -174,4 +174,4 @@ pm_good:
return clocksource_register(&clocksource_acpi_pm);
}
-module_init(init_acpi_pm_clocksource);
+postcore_initcall(init_acpi_pm_clocksource);
Index: linux-2.6.17/drivers/clocksource/cyclone.c
===================================================================
--- linux-2.6.17.orig/drivers/clocksource/cyclone.c
+++ linux-2.6.17/drivers/clocksource/cyclone.c
@@ -116,4 +116,4 @@ static int __init init_cyclone_clocksour
return clocksource_register(&clocksource_cyclone);
}
-module_init(init_cyclone_clocksource);
+postcore_initcall(init_cyclone_clocksource);
Index: linux-2.6.17/drivers/clocksource/scx200_hrt.c
===================================================================
--- linux-2.6.17.orig/drivers/clocksource/scx200_hrt.c
+++ linux-2.6.17/drivers/clocksource/scx200_hrt.c
@@ -94,7 +94,7 @@ static int __init init_hrt_clocksource(v
return clocksource_register(&cs_hrt);
}
-module_init(init_hrt_clocksource);
+postcore_initcall(init_hrt_clocksource);
MODULE_AUTHOR("Jim Cromie <jim.cromie@gmail.com>");
MODULE_DESCRIPTION("clocksource on SCx200 HiRes Timer");
Index: linux-2.6.17/kernel/time/clocksource.c
===================================================================
--- linux-2.6.17.orig/kernel/time/clocksource.c
+++ linux-2.6.17/kernel/time/clocksource.c
@@ -50,19 +50,6 @@ static struct clocksource *next_clocksou
static LIST_HEAD(clocksource_list);
static DEFINE_SPINLOCK(clocksource_lock);
static char override_name[32];
-static int finished_booting;
-
-/* clocksource_done_booting - Called near the end of bootup
- *
- * Hack to avoid lots of clocksource churn at boot time
- */
-static int __init clocksource_done_booting(void)
-{
- finished_booting = 1;
- return 0;
-}
-
-late_initcall(clocksource_done_booting);
/**
* clocksource_get_next - Returns the selected clocksource
@@ -73,7 +60,7 @@ struct clocksource *clocksource_get_next
unsigned long flags;
spin_lock_irqsave(&clocksource_lock, flags);
- if (next_clocksource && finished_booting) {
+ if (next_clocksource) {
curr_clocksource = next_clocksource;
next_clocksource = NULL;
}
Index: linux-2.6.17/kernel/time/jiffies.c
===================================================================
--- linux-2.6.17.orig/kernel/time/jiffies.c
+++ linux-2.6.17/kernel/time/jiffies.c
@@ -70,4 +70,4 @@ static int __init init_jiffies_clocksour
return clocksource_register(&clocksource_jiffies);
}
-module_init(init_jiffies_clocksource);
+postcore_initcall(init_jiffies_clocksource);
--
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [PATCH 01/10] -mm clocksource: increase initcall priority
2006-08-04 3:24 ` [PATCH 01/10] -mm clocksource: increase initcall priority dwalker
@ 2006-08-04 18:39 ` john stultz
0 siblings, 0 replies; 21+ messages in thread
From: john stultz @ 2006-08-04 18:39 UTC (permalink / raw)
To: dwalker; +Cc: akpm, linux-kernel
First of all, sorry for not reviewing these patches sooner. Its been
sitting in on my todo list for way too long. Thanks for continuing to
push it despite my slowness.
On Thu, 2006-08-03 at 20:24 -0700, dwalker@mvista.com wrote:
> plain text document attachment (clocksource_init_call.patch)
> Since it's likely that this interface would get used during bootup
> I moved all the clocksource registration into the postcore initcall.
> This also eliminated some clocksource shuffling during bootup.
This one looks interesting. As long as it works as advertised I'm fine
with it, although I worry it might bring up some initialization ordering
issues. It will need some careful testing in -mm.
thanks
-john
^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH 02/10] -mm clocksource: small cleanup
2006-08-04 3:24 [PATCH 00/10] -mm generic clocksoure API dwalker
2006-08-04 3:24 ` [PATCH 01/10] -mm clocksource: increase initcall priority dwalker
@ 2006-08-04 3:24 ` dwalker
2006-08-04 18:40 ` john stultz
2006-08-04 3:24 ` [PATCH 03/10] -mm clocksource: enable plist dwalker
` (7 subsequent siblings)
9 siblings, 1 reply; 21+ messages in thread
From: dwalker @ 2006-08-04 3:24 UTC (permalink / raw)
To: akpm; +Cc: linux-kernel, johnstul
[-- Attachment #1: clocksource_cleanup.patch --]
[-- Type: text/plain, Size: 2776 bytes --]
Signed-Off-By: Daniel Walker <dwalker@mvista.com>
---
include/linux/clocksource.h | 2 +-
kernel/time/clocksource.c | 6 +++---
kernel/timer.c | 7 ++++---
3 files changed, 8 insertions(+), 7 deletions(-)
Index: linux-2.6.17/include/linux/clocksource.h
===================================================================
--- linux-2.6.17.orig/include/linux/clocksource.h
+++ linux-2.6.17/include/linux/clocksource.h
@@ -159,7 +159,7 @@ static inline s64 cyc2ns(struct clocksou
* Unless you're the timekeeping code, you should not be using this!
*/
static inline void clocksource_calculate_interval(struct clocksource *c,
- unsigned long length_nsec)
+ unsigned long length_nsec)
{
u64 tmp;
Index: linux-2.6.17/kernel/time/clocksource.c
===================================================================
--- linux-2.6.17.orig/kernel/time/clocksource.c
+++ linux-2.6.17/kernel/time/clocksource.c
@@ -143,7 +143,7 @@ int clocksource_register(struct clocksou
/* check if clocksource is already registered */
if (is_registered_source(c)) {
printk("register_clocksource: Cannot register %s. "
- "Already registered!", c->name);
+ "Already registered!", c->name);
ret = -EBUSY;
} else {
/* register it */
@@ -262,10 +262,10 @@ sysfs_show_available_clocksources(struct
* Sysfs setup bits:
*/
static SYSDEV_ATTR(current_clocksource, 0600, sysfs_show_current_clocksources,
- sysfs_override_clocksource);
+ sysfs_override_clocksource);
static SYSDEV_ATTR(available_clocksource, 0600,
- sysfs_show_available_clocksources, NULL);
+ sysfs_show_available_clocksources, NULL);
static struct sysdev_class clocksource_sysclass = {
set_kset_name("clocksource"),
Index: linux-2.6.17/kernel/timer.c
===================================================================
--- linux-2.6.17.orig/kernel/timer.c
+++ linux-2.6.17/kernel/timer.c
@@ -924,7 +924,7 @@ static int change_clocksource(void)
clock = new;
clock->cycle_last = now;
printk(KERN_INFO "Time: %s clocksource has been installed.\n",
- clock->name);
+ clock->name);
return 1;
} else if (clock->update_callback) {
return clock->update_callback();
@@ -932,7 +932,7 @@ static int change_clocksource(void)
return 0;
}
#else
-#define change_clocksource() (0)
+#define change_clocksource() do { 0; } while(0)
#endif
/**
@@ -1149,7 +1149,8 @@ static void update_wall_time(void)
/* accumulate error between NTP and clock interval */
clock->error += current_tick_length();
- clock->error -= clock->xtime_interval << (TICK_LENGTH_SHIFT - clock->shift);
+ clock->error -= clock->xtime_interval <<
+ (TICK_LENGTH_SHIFT - clock->shift);
}
/* correct the clock when NTP error is too big */
--
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [PATCH 02/10] -mm clocksource: small cleanup
2006-08-04 3:24 ` [PATCH 02/10] -mm clocksource: small cleanup dwalker
@ 2006-08-04 18:40 ` john stultz
0 siblings, 0 replies; 21+ messages in thread
From: john stultz @ 2006-08-04 18:40 UTC (permalink / raw)
To: dwalker; +Cc: akpm, linux-kernel
On Thu, 2006-08-03 at 20:24 -0700, dwalker@mvista.com wrote:
> plain text document attachment (clocksource_cleanup.patch)
> Signed-Off-By: Daniel Walker <dwalker@mvista.com>
Acked-by: John Stultz <johnstul@us.ibm.com>
thanks
-john
> ---
> include/linux/clocksource.h | 2 +-
> kernel/time/clocksource.c | 6 +++---
> kernel/timer.c | 7 ++++---
> 3 files changed, 8 insertions(+), 7 deletions(-)
>
> Index: linux-2.6.17/include/linux/clocksource.h
> ===================================================================
> --- linux-2.6.17.orig/include/linux/clocksource.h
> +++ linux-2.6.17/include/linux/clocksource.h
> @@ -159,7 +159,7 @@ static inline s64 cyc2ns(struct clocksou
> * Unless you're the timekeeping code, you should not be using this!
> */
> static inline void clocksource_calculate_interval(struct clocksource *c,
> - unsigned long length_nsec)
> + unsigned long length_nsec)
> {
> u64 tmp;
>
> Index: linux-2.6.17/kernel/time/clocksource.c
> ===================================================================
> --- linux-2.6.17.orig/kernel/time/clocksource.c
> +++ linux-2.6.17/kernel/time/clocksource.c
> @@ -143,7 +143,7 @@ int clocksource_register(struct clocksou
> /* check if clocksource is already registered */
> if (is_registered_source(c)) {
> printk("register_clocksource: Cannot register %s. "
> - "Already registered!", c->name);
> + "Already registered!", c->name);
> ret = -EBUSY;
> } else {
> /* register it */
> @@ -262,10 +262,10 @@ sysfs_show_available_clocksources(struct
> * Sysfs setup bits:
> */
> static SYSDEV_ATTR(current_clocksource, 0600, sysfs_show_current_clocksources,
> - sysfs_override_clocksource);
> + sysfs_override_clocksource);
>
> static SYSDEV_ATTR(available_clocksource, 0600,
> - sysfs_show_available_clocksources, NULL);
> + sysfs_show_available_clocksources, NULL);
>
> static struct sysdev_class clocksource_sysclass = {
> set_kset_name("clocksource"),
> Index: linux-2.6.17/kernel/timer.c
> ===================================================================
> --- linux-2.6.17.orig/kernel/timer.c
> +++ linux-2.6.17/kernel/timer.c
> @@ -924,7 +924,7 @@ static int change_clocksource(void)
> clock = new;
> clock->cycle_last = now;
> printk(KERN_INFO "Time: %s clocksource has been installed.\n",
> - clock->name);
> + clock->name);
> return 1;
> } else if (clock->update_callback) {
> return clock->update_callback();
> @@ -932,7 +932,7 @@ static int change_clocksource(void)
> return 0;
> }
> #else
> -#define change_clocksource() (0)
> +#define change_clocksource() do { 0; } while(0)
> #endif
>
> /**
> @@ -1149,7 +1149,8 @@ static void update_wall_time(void)
>
> /* accumulate error between NTP and clock interval */
> clock->error += current_tick_length();
> - clock->error -= clock->xtime_interval << (TICK_LENGTH_SHIFT - clock->shift);
> + clock->error -= clock->xtime_interval <<
> + (TICK_LENGTH_SHIFT - clock->shift);
> }
>
> /* correct the clock when NTP error is too big */
>
> --
^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH 03/10] -mm clocksource: enable plist
2006-08-04 3:24 [PATCH 00/10] -mm generic clocksoure API dwalker
2006-08-04 3:24 ` [PATCH 01/10] -mm clocksource: increase initcall priority dwalker
2006-08-04 3:24 ` [PATCH 02/10] -mm clocksource: small cleanup dwalker
@ 2006-08-04 3:24 ` dwalker
2006-08-04 3:24 ` [PATCH 04/10] -mm clocksource: add some new API calls dwalker
` (6 subsequent siblings)
9 siblings, 0 replies; 21+ messages in thread
From: dwalker @ 2006-08-04 3:24 UTC (permalink / raw)
To: akpm; +Cc: linux-kernel
[-- Attachment #1: clocksource_enable_plist.patch --]
[-- Type: text/plain, Size: 1665 bytes --]
I have a feeling this might get a little more discussion than the
other stuff, so it's in it's own patch.
Signed-Off-By: Daniel Walker <dwalker@mvista.com>
---
init/Kconfig | 1 -
lib/Kconfig | 6 ------
lib/Makefile | 3 +--
3 files changed, 1 insertion(+), 9 deletions(-)
Index: linux-2.6.17/init/Kconfig
===================================================================
--- linux-2.6.17.orig/init/Kconfig
+++ linux-2.6.17/init/Kconfig
@@ -407,7 +407,6 @@ config BASE_FULL
config RT_MUTEXES
boolean
- select PLIST
config FUTEX
bool "Enable futex support" if EMBEDDED
Index: linux-2.6.17/lib/Kconfig
===================================================================
--- linux-2.6.17.orig/lib/Kconfig
+++ linux-2.6.17/lib/Kconfig
@@ -86,10 +86,4 @@ config TEXTSEARCH_BM
config TEXTSEARCH_FSM
tristate
-#
-# plist support is select#ed if needed
-#
-config PLIST
- boolean
-
endmenu
Index: linux-2.6.17/lib/Makefile
===================================================================
--- linux-2.6.17.orig/lib/Makefile
+++ linux-2.6.17/lib/Makefile
@@ -5,7 +5,7 @@
lib-y := errno.o ctype.o string.o vsprintf.o cmdline.o \
bust_spinlocks.o rbtree.o radix-tree.o dump_stack.o \
idr.o div64.o int_sqrt.o bitmap.o extable.o prio_tree.o \
- sha1.o
+ sha1.o plist.o
lib-$(CONFIG_SMP) += cpumask.o
@@ -26,7 +26,6 @@ lib-$(CONFIG_SEMAPHORE_SLEEPERS) += sema
lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o
lib-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o
-obj-$(CONFIG_PLIST) += plist.o
obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
obj-$(CONFIG_DEBUG_LIST) += list.o
--
^ permalink raw reply [flat|nested] 21+ messages in thread* [PATCH 04/10] -mm clocksource: add some new API calls
2006-08-04 3:24 [PATCH 00/10] -mm generic clocksoure API dwalker
` (2 preceding siblings ...)
2006-08-04 3:24 ` [PATCH 03/10] -mm clocksource: enable plist dwalker
@ 2006-08-04 3:24 ` dwalker
2006-08-04 19:06 ` john stultz
2006-08-04 3:24 ` [PATCH 05/10] -mm clocksource: convert generic timeofday dwalker
` (5 subsequent siblings)
9 siblings, 1 reply; 21+ messages in thread
From: dwalker @ 2006-08-04 3:24 UTC (permalink / raw)
To: akpm; +Cc: linux-kernel, johnstul
[-- Attachment #1: clocksource_user_api.patch --]
[-- Type: text/plain, Size: 11267 bytes --]
This introduces some new API calls,
- clocksource_get_clock()
Allows a clock lookup by name.
- clocksource_rating_change()
Used by a clocksource to signal a rating change. Replaces
reselect_clocksource()
I also moved the the clock source list to a plist, which removes some lookup
overhead in the average case.
Signed-Off-By: Daniel Walker <dwalker@mvista.com>
---
arch/i386/kernel/tsc.c | 2
include/linux/clocksource.h | 45 ++++++++++++-
kernel/time/clocksource.c | 149 ++++++++++++++++++++++++++++----------------
3 files changed, 139 insertions(+), 57 deletions(-)
Index: linux-2.6.17/arch/i386/kernel/tsc.c
===================================================================
--- linux-2.6.17.orig/arch/i386/kernel/tsc.c
+++ linux-2.6.17/arch/i386/kernel/tsc.c
@@ -351,7 +351,7 @@ static int tsc_update_callback(void)
/* check to see if we should switch to the safe clocksource: */
if (clocksource_tsc.rating != 50 && check_tsc_unstable()) {
clocksource_tsc.rating = 50;
- clocksource_reselect();
+ clocksource_rating_change(&clocksource_tsc);
change = 1;
}
Index: linux-2.6.17/include/linux/clocksource.h
===================================================================
--- linux-2.6.17.orig/include/linux/clocksource.h
+++ linux-2.6.17/include/linux/clocksource.h
@@ -12,12 +12,20 @@
#include <linux/timex.h>
#include <linux/time.h>
#include <linux/list.h>
+#include <linux/plist.h>
+#include <linux/sysdev.h>
#include <asm/div64.h>
#include <asm/io.h>
/* clocksource cycle base type */
typedef u64 cycle_t;
+/*
+ * This is the only generic clock, it should be used
+ * for early initialization.
+ */
+extern struct clocksource clocksource_jiffies;
+
/**
* struct clocksource - hardware abstraction for a free running counter
* Provides mostly state-free accessors to the underlying hardware.
@@ -51,7 +59,7 @@ typedef u64 cycle_t;
*/
struct clocksource {
char *name;
- struct list_head list;
+ struct plist_node list;
int rating;
cycle_t (*read)(void);
cycle_t mask;
@@ -148,6 +156,25 @@ static inline s64 cyc2ns(struct clocksou
}
/**
+ * ns2cyc - converts nanoseconds to clocksource cycles
+ * @cs: Pointer to clocksource
+ * @ns: Nanoseconds
+ *
+ * Uses the clocksource to convert nanoseconds back to cycles.
+ *
+ * XXX - This could use some mult_lxl_ll() asm optimization
+ */
+static inline cycle_t ns2cyc(struct clocksource *cs, s64 ns)
+{
+ u64 ret = ns;
+
+ ret <<= cs->shift;
+ do_div(ret, cs->mult);
+
+ return ret;
+}
+
+/**
* clocksource_calculate_interval - Calculates a clocksource interval struct
*
* @c: Pointer to clocksource.
@@ -178,8 +205,18 @@ static inline void clocksource_calculate
/* used to install a new clocksource */
-int clocksource_register(struct clocksource*);
-void clocksource_reselect(void);
-struct clocksource* clocksource_get_next(void);
+extern int clocksource_register(struct clocksource*);
+extern void clocksource_rating_change(struct clocksource*);
+extern struct clocksource * clocksource_get_clock(char*);
+/**
+ * clocksource_get_best_clock - Finds highest rated clocksource
+ *
+ * Returns the highest rated clocksource. If none are register the
+ * jiffies clock is returned.
+ */
+static inline struct clocksource * clocksource_get_best_clock(void)
+{
+ return clocksource_get_clock(NULL);
+}
#endif /* _LINUX_CLOCKSOURCE_H */
Index: linux-2.6.17/kernel/time/clocksource.c
===================================================================
--- linux-2.6.17.orig/kernel/time/clocksource.c
+++ linux-2.6.17/kernel/time/clocksource.c
@@ -32,13 +32,18 @@
/* XXX - Would like a better way for initializing curr_clocksource */
extern struct clocksource clocksource_jiffies;
+/*
+ * Internally used to invert the rating, so lower is better.
+ */
+#define CLOCKSOURCE_RATING(x) (INT_MAX-x)
+
/*[Clocksource internal variables]---------
* curr_clocksource:
* currently selected clocksource. Initialized to clocksource_jiffies.
* next_clocksource:
* pending next selected clocksource.
* clocksource_list:
- * linked list with the registered clocksources
+ * priority list with the registered clocksources
* clocksource_lock:
* protects manipulations to curr_clocksource and next_clocksource
* and the clocksource_list
@@ -47,7 +52,8 @@ extern struct clocksource clocksource_ji
*/
static struct clocksource *curr_clocksource = &clocksource_jiffies;
static struct clocksource *next_clocksource;
-static LIST_HEAD(clocksource_list);
+static struct plist_head clocksource_list =
+ PLIST_HEAD_INIT(clocksource_list, clocksource_lock);
static DEFINE_SPINLOCK(clocksource_lock);
static char override_name[32];
@@ -70,84 +76,111 @@ struct clocksource *clocksource_get_next
}
/**
- * select_clocksource - Finds the best registered clocksource.
+ * __is_registered - Returns a clocksource if it's registered
+ * @name: name of the clocksource to return
*
* Private function. Must hold clocksource_lock when called.
*
- * Looks through the list of registered clocksources, returning
- * the one with the highest rating value. If there is a clocksource
- * name that matches the override string, it returns that clocksource.
+ * Returns the clocksource if registered, zero otherwise.
+ * If no clocksources are registered the jiffies clock is
+ * returned.
*/
-static struct clocksource *select_clocksource(void)
+static struct clocksource * __is_registered(char * name)
{
- struct clocksource *best = NULL;
- struct list_head *tmp;
+ struct plist_node *tmp;
- list_for_each(tmp, &clocksource_list) {
+ plist_for_each(tmp, &clocksource_list) {
struct clocksource *src;
src = list_entry(tmp, struct clocksource, list);
- if (!best)
- best = src;
-
- /* check for override: */
- if (strlen(src->name) == strlen(override_name) &&
- !strcmp(src->name, override_name)) {
- best = src;
- break;
- }
- /* pick the highest rating: */
- if (src->rating > best->rating)
- best = src;
+ if (!strcmp(src->name, name))
+ return src;
}
- return best;
+ return 0;
}
/**
- * is_registered_source - Checks if clocksource is registered
- * @c: pointer to a clocksource
+ * __get_clock - Finds a specific clocksource
+ * @name: name of the clocksource to return
*
- * Private helper function. Must hold clocksource_lock when called.
+ * Private function. Must hold clocksource_lock when called.
*
- * Returns one if the clocksource is already registered, zero otherwise.
+ * Returns the clocksource if registered, zero otherwise.
+ * If the @name is null the highest rated clock is returned.
*/
-static int is_registered_source(struct clocksource *c)
+static inline struct clocksource * __get_clock(char * name)
{
- int len = strlen(c->name);
- struct list_head *tmp;
- list_for_each(tmp, &clocksource_list) {
- struct clocksource *src;
+ if (unlikely(plist_head_empty(&clocksource_list)))
+ return &clocksource_jiffies;
- src = list_entry(tmp, struct clocksource, list);
- if (strlen(src->name) == len && !strcmp(src->name, c->name))
- return 1;
- }
+ if (!name)
+ return plist_first_entry(&clocksource_list, struct clocksource,
+ list);
- return 0;
+ return __is_registered(name);
+}
+
+/**
+ * clocksource_get_clock - Finds a specific clocksource
+ * @name: name of the clocksource to return
+ *
+ * Returns the clocksource if registered, zero otherwise.
+ */
+struct clocksource * clocksource_get_clock(char * name)
+{
+ struct clocksource * ret;
+ unsigned long flags;
+
+ spin_lock_irqsave(&clocksource_lock, flags);
+ ret = __get_clock(name);
+ spin_unlock_irqrestore(&clocksource_lock, flags);
+ return ret;
+}
+
+
+/**
+ * select_clocksource - Finds the best registered clocksource.
+ *
+ * Private function. Must hold clocksource_lock when called.
+ *
+ * Looks through the list of registered clocksources, returning
+ * the one with the highest rating value. If there is a clocksource
+ * name that matches the override string, it returns that clocksource.
+ */
+static struct clocksource *select_clocksource(void)
+{
+ if (!*override_name)
+ return plist_first_entry(&clocksource_list, struct clocksource,
+ list);
+ return get_clock(override_name);
}
/**
* clocksource_register - Used to install new clocksources
* @t: clocksource to be registered
*
- * Returns -EBUSY if registration fails, zero otherwise.
+ * Returns -EBUSY clock is already registered,
+ * Returns -EINVAL if clocksource is invalid,
+ * Return zero otherwise.
*/
int clocksource_register(struct clocksource *c)
{
int ret = 0;
unsigned long flags;
+ if (unlikely(!c))
+ return -EINVAL;
+
spin_lock_irqsave(&clocksource_lock, flags);
- /* check if clocksource is already registered */
- if (is_registered_source(c)) {
- printk("register_clocksource: Cannot register %s. "
+ if (unlikely(!plist_node_empty(&c->list) && __is_registered(c->name))) {
+ printk("register_clocksource: Cannot register %s clocksource. "
"Already registered!", c->name);
ret = -EBUSY;
} else {
- /* register it */
- list_add(&c->list, &clocksource_list);
+ plist_node_init(&c->list, CLOCKSOURCE_RATING(c->rating));
+ plist_add(&c->list, &clocksource_list);
/* scan the registered clocksources, and pick the best one */
next_clocksource = select_clocksource();
}
@@ -157,21 +190,32 @@ int clocksource_register(struct clocksou
EXPORT_SYMBOL(clocksource_register);
/**
- * clocksource_reselect - Rescan list for next clocksource
+ * clocksource_rating_change - Allows dynamic rating changes for register
+ * clocksources.
*
- * A quick helper function to be used if a clocksource changes its
- * rating. Forces the clocksource list to be re-scanned for the best
- * clocksource.
+ * Signals that a clocksource is dynamically changing it's rating.
+ * This could happen if a clocksource becomes more/less stable.
*/
-void clocksource_reselect(void)
+void clocksource_rating_change(struct clocksource *c)
{
unsigned long flags;
+ if (unlikely(plist_node_empty(&c->list)))
+ return;
+
spin_lock_irqsave(&clocksource_lock, flags);
+
+ /*
+ * Re-register the clocksource with it's new rating.
+ */
+ plist_del(&c->list, &clocksource_list);
+ plist_node_init(&c->list, CLOCKSOURCE_RATING(c->rating));
+ plist_add(&c->list, &clocksource_list);
+
next_clocksource = select_clocksource();
spin_unlock_irqrestore(&clocksource_lock, flags);
}
-EXPORT_SYMBOL(clocksource_reselect);
+EXPORT_SYMBOL(clocksource_rating_change);
/**
* sysfs_show_current_clocksources - sysfs interface for current clocksource
@@ -236,16 +280,17 @@ static ssize_t sysfs_override_clocksourc
* @dev: unused
* @buf: char buffer to be filled with clocksource list
*
- * Provides sysfs interface for listing registered clocksources
+ * Provides sysfs interface for listing registered clocksources.
+ * Output in priority sorted order.
*/
static ssize_t
sysfs_show_available_clocksources(struct sys_device *dev, char *buf)
{
- struct list_head *tmp;
+ struct plist_node *tmp;
char *curr = buf;
spin_lock_irq(&clocksource_lock);
- list_for_each(tmp, &clocksource_list) {
+ plist_for_each(tmp, &clocksource_list) {
struct clocksource *src;
src = list_entry(tmp, struct clocksource, list);
--
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [PATCH 04/10] -mm clocksource: add some new API calls
2006-08-04 3:24 ` [PATCH 04/10] -mm clocksource: add some new API calls dwalker
@ 2006-08-04 19:06 ` john stultz
2006-08-04 19:28 ` Daniel Walker
0 siblings, 1 reply; 21+ messages in thread
From: john stultz @ 2006-08-04 19:06 UTC (permalink / raw)
To: dwalker; +Cc: akpm, linux-kernel
On Thu, 2006-08-03 at 20:24 -0700, dwalker@mvista.com wrote:
> plain text document attachment (clocksource_user_api.patch)
> This introduces some new API calls,
>
> - clocksource_get_clock()
> Allows a clock lookup by name.
> - clocksource_rating_change()
> Used by a clocksource to signal a rating change. Replaces
> reselect_clocksource()
>
> I also moved the the clock source list to a plist, which removes some lookup
> overhead in the average case.
>
> Signed-Off-By: Daniel Walker <dwalker@mvista.com>
>
> ---
> arch/i386/kernel/tsc.c | 2
> include/linux/clocksource.h | 45 ++++++++++++-
> kernel/time/clocksource.c | 149 ++++++++++++++++++++++++++++----------------
> 3 files changed, 139 insertions(+), 57 deletions(-)
>
> Index: linux-2.6.17/arch/i386/kernel/tsc.c
> ===================================================================
> --- linux-2.6.17.orig/arch/i386/kernel/tsc.c
> +++ linux-2.6.17/arch/i386/kernel/tsc.c
> @@ -351,7 +351,7 @@ static int tsc_update_callback(void)
> /* check to see if we should switch to the safe clocksource: */
> if (clocksource_tsc.rating != 50 && check_tsc_unstable()) {
> clocksource_tsc.rating = 50;
> - clocksource_reselect();
> + clocksource_rating_change(&clocksource_tsc);
> change = 1;
> }
>
> Index: linux-2.6.17/include/linux/clocksource.h
> ===================================================================
> --- linux-2.6.17.orig/include/linux/clocksource.h
> +++ linux-2.6.17/include/linux/clocksource.h
> @@ -12,12 +12,20 @@
> #include <linux/timex.h>
> #include <linux/time.h>
> #include <linux/list.h>
> +#include <linux/plist.h>
> +#include <linux/sysdev.h>
> #include <asm/div64.h>
> #include <asm/io.h>
>
> /* clocksource cycle base type */
> typedef u64 cycle_t;
>
> +/*
> + * This is the only generic clock, it should be used
> + * for early initialization.
> + */
> +extern struct clocksource clocksource_jiffies;
> +
> /**
> * struct clocksource - hardware abstraction for a free running counter
> * Provides mostly state-free accessors to the underlying hardware.
> @@ -51,7 +59,7 @@ typedef u64 cycle_t;
> */
> struct clocksource {
> char *name;
> - struct list_head list;
> + struct plist_node list;
> int rating;
> cycle_t (*read)(void);
> cycle_t mask;
> @@ -148,6 +156,25 @@ static inline s64 cyc2ns(struct clocksou
> }
>
> /**
> + * ns2cyc - converts nanoseconds to clocksource cycles
> + * @cs: Pointer to clocksource
> + * @ns: Nanoseconds
> + *
> + * Uses the clocksource to convert nanoseconds back to cycles.
> + *
> + * XXX - This could use some mult_lxl_ll() asm optimization
> + */
> +static inline cycle_t ns2cyc(struct clocksource *cs, s64 ns)
> +{
> + u64 ret = ns;
> +
> + ret <<= cs->shift;
> + do_div(ret, cs->mult);
> +
> + return ret;
> +}
> +
> +/**
> * clocksource_calculate_interval - Calculates a clocksource interval struct
> *
> * @c: Pointer to clocksource.
> @@ -178,8 +205,18 @@ static inline void clocksource_calculate
>
>
> /* used to install a new clocksource */
> -int clocksource_register(struct clocksource*);
> -void clocksource_reselect(void);
> -struct clocksource* clocksource_get_next(void);
> +extern int clocksource_register(struct clocksource*);
> +extern void clocksource_rating_change(struct clocksource*);
> +extern struct clocksource * clocksource_get_clock(char*);
>
> +/**
> + * clocksource_get_best_clock - Finds highest rated clocksource
> + *
> + * Returns the highest rated clocksource. If none are register the
> + * jiffies clock is returned.
> + */
> +static inline struct clocksource * clocksource_get_best_clock(void)
> +{
> + return clocksource_get_clock(NULL);
> +}
> #endif /* _LINUX_CLOCKSOURCE_H */
> Index: linux-2.6.17/kernel/time/clocksource.c
> ===================================================================
> --- linux-2.6.17.orig/kernel/time/clocksource.c
> +++ linux-2.6.17/kernel/time/clocksource.c
> @@ -32,13 +32,18 @@
> /* XXX - Would like a better way for initializing curr_clocksource */
> extern struct clocksource clocksource_jiffies;
>
> +/*
> + * Internally used to invert the rating, so lower is better.
> + */
> +#define CLOCKSOURCE_RATING(x) (INT_MAX-x)
Since this is used for the plist bits, could it get a more explicit
name?
> /*[Clocksource internal variables]---------
> * curr_clocksource:
> * currently selected clocksource. Initialized to clocksource_jiffies.
> * next_clocksource:
> * pending next selected clocksource.
> * clocksource_list:
> - * linked list with the registered clocksources
> + * priority list with the registered clocksources
> * clocksource_lock:
> * protects manipulations to curr_clocksource and next_clocksource
> * and the clocksource_list
> @@ -47,7 +52,8 @@ extern struct clocksource clocksource_ji
> */
> static struct clocksource *curr_clocksource = &clocksource_jiffies;
> static struct clocksource *next_clocksource;
> -static LIST_HEAD(clocksource_list);
> +static struct plist_head clocksource_list =
> + PLIST_HEAD_INIT(clocksource_list, clocksource_lock);
> static DEFINE_SPINLOCK(clocksource_lock);
> static char override_name[32];
>
> @@ -70,84 +76,111 @@ struct clocksource *clocksource_get_next
> }
>
> /**
> - * select_clocksource - Finds the best registered clocksource.
> + * __is_registered - Returns a clocksource if it's registered
> + * @name: name of the clocksource to return
> *
> * Private function. Must hold clocksource_lock when called.
> *
> - * Looks through the list of registered clocksources, returning
> - * the one with the highest rating value. If there is a clocksource
> - * name that matches the override string, it returns that clocksource.
> + * Returns the clocksource if registered, zero otherwise.
> + * If no clocksources are registered the jiffies clock is
> + * returned.
> */
> -static struct clocksource *select_clocksource(void)
> +static struct clocksource * __is_registered(char * name)
> {
> - struct clocksource *best = NULL;
> - struct list_head *tmp;
> + struct plist_node *tmp;
>
> - list_for_each(tmp, &clocksource_list) {
> + plist_for_each(tmp, &clocksource_list) {
> struct clocksource *src;
>
> src = list_entry(tmp, struct clocksource, list);
> - if (!best)
> - best = src;
> -
> - /* check for override: */
> - if (strlen(src->name) == strlen(override_name) &&
> - !strcmp(src->name, override_name)) {
> - best = src;
> - break;
> - }
> - /* pick the highest rating: */
> - if (src->rating > best->rating)
> - best = src;
> + if (!strcmp(src->name, name))
> + return src;
> }
>
> - return best;
> + return 0;
> }
>
> /**
> - * is_registered_source - Checks if clocksource is registered
> - * @c: pointer to a clocksource
> + * __get_clock - Finds a specific clocksource
> + * @name: name of the clocksource to return
> *
> - * Private helper function. Must hold clocksource_lock when called.
> + * Private function. Must hold clocksource_lock when called.
> *
> - * Returns one if the clocksource is already registered, zero otherwise.
> + * Returns the clocksource if registered, zero otherwise.
> + * If the @name is null the highest rated clock is returned.
> */
> -static int is_registered_source(struct clocksource *c)
> +static inline struct clocksource * __get_clock(char * name)
> {
> - int len = strlen(c->name);
> - struct list_head *tmp;
>
> - list_for_each(tmp, &clocksource_list) {
> - struct clocksource *src;
> + if (unlikely(plist_head_empty(&clocksource_list)))
> + return &clocksource_jiffies;
>
> - src = list_entry(tmp, struct clocksource, list);
> - if (strlen(src->name) == len && !strcmp(src->name, c->name))
> - return 1;
> - }
> + if (!name)
> + return plist_first_entry(&clocksource_list, struct clocksource,
> + list);
>
> - return 0;
> + return __is_registered(name);
> +}
> +
> +/**
> + * clocksource_get_clock - Finds a specific clocksource
> + * @name: name of the clocksource to return
> + *
> + * Returns the clocksource if registered, zero otherwise.
> + */
> +struct clocksource * clocksource_get_clock(char * name)
> +{
> + struct clocksource * ret;
> + unsigned long flags;
> +
> + spin_lock_irqsave(&clocksource_lock, flags);
> + ret = __get_clock(name);
> + spin_unlock_irqrestore(&clocksource_lock, flags);
> + return ret;
> +}
> +
> +
> +/**
> + * select_clocksource - Finds the best registered clocksource.
> + *
> + * Private function. Must hold clocksource_lock when called.
> + *
> + * Looks through the list of registered clocksources, returning
> + * the one with the highest rating value. If there is a clocksource
> + * name that matches the override string, it returns that clocksource.
> + */
> +static struct clocksource *select_clocksource(void)
> +{
> + if (!*override_name)
> + return plist_first_entry(&clocksource_list, struct clocksource,
> + list);
> + return get_clock(override_name);
> }
This all looks good.
> /**
> * clocksource_register - Used to install new clocksources
> * @t: clocksource to be registered
> *
> - * Returns -EBUSY if registration fails, zero otherwise.
> + * Returns -EBUSY clock is already registered,
> + * Returns -EINVAL if clocksource is invalid,
> + * Return zero otherwise.
> */
> int clocksource_register(struct clocksource *c)
> {
> int ret = 0;
> unsigned long flags;
>
> + if (unlikely(!c))
> + return -EINVAL;
> +
I'm not sure I understand the need for this. Is it really likely someone
would pass NULL to clocksource_register()?
> spin_lock_irqsave(&clocksource_lock, flags);
> - /* check if clocksource is already registered */
> - if (is_registered_source(c)) {
> - printk("register_clocksource: Cannot register %s. "
> + if (unlikely(!plist_node_empty(&c->list) && __is_registered(c->name))) {
> + printk("register_clocksource: Cannot register %s clocksource. "
> "Already registered!", c->name);
> ret = -EBUSY;
> } else {
> - /* register it */
> - list_add(&c->list, &clocksource_list);
> + plist_node_init(&c->list, CLOCKSOURCE_RATING(c->rating));
> + plist_add(&c->list, &clocksource_list);
> /* scan the registered clocksources, and pick the best one */
> next_clocksource = select_clocksource();
> }
> @@ -157,21 +190,32 @@ int clocksource_register(struct clocksou
> EXPORT_SYMBOL(clocksource_register);
>
> /**
> - * clocksource_reselect - Rescan list for next clocksource
> + * clocksource_rating_change - Allows dynamic rating changes for register
> + * clocksources.
> *
> - * A quick helper function to be used if a clocksource changes its
> - * rating. Forces the clocksource list to be re-scanned for the best
> - * clocksource.
> + * Signals that a clocksource is dynamically changing it's rating.
> + * This could happen if a clocksource becomes more/less stable.
> */
> -void clocksource_reselect(void)
> +void clocksource_rating_change(struct clocksource *c)
> {
> unsigned long flags;
>
> + if (unlikely(plist_node_empty(&c->list)))
> + return;
> +
> spin_lock_irqsave(&clocksource_lock, flags);
> +
> + /*
> + * Re-register the clocksource with it's new rating.
> + */
> + plist_del(&c->list, &clocksource_list);
> + plist_node_init(&c->list, CLOCKSOURCE_RATING(c->rating));
> + plist_add(&c->list, &clocksource_list);
> +
> next_clocksource = select_clocksource();
> spin_unlock_irqrestore(&clocksource_lock, flags);
> }
> -EXPORT_SYMBOL(clocksource_reselect);
> +EXPORT_SYMBOL(clocksource_rating_change);
>
> /**
> * sysfs_show_current_clocksources - sysfs interface for current clocksource
> @@ -236,16 +280,17 @@ static ssize_t sysfs_override_clocksourc
> * @dev: unused
> * @buf: char buffer to be filled with clocksource list
> *
> - * Provides sysfs interface for listing registered clocksources
> + * Provides sysfs interface for listing registered clocksources.
> + * Output in priority sorted order.
> */
> static ssize_t
> sysfs_show_available_clocksources(struct sys_device *dev, char *buf)
> {
> - struct list_head *tmp;
> + struct plist_node *tmp;
> char *curr = buf;
>
> spin_lock_irq(&clocksource_lock);
> - list_for_each(tmp, &clocksource_list) {
> + plist_for_each(tmp, &clocksource_list) {
> struct clocksource *src;
>
> src = list_entry(tmp, struct clocksource, list);
>
No real objections to this one.
thanks
-john
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [PATCH 04/10] -mm clocksource: add some new API calls
2006-08-04 19:06 ` john stultz
@ 2006-08-04 19:28 ` Daniel Walker
2006-08-04 21:05 ` Thomas Gleixner
0 siblings, 1 reply; 21+ messages in thread
From: Daniel Walker @ 2006-08-04 19:28 UTC (permalink / raw)
To: john stultz; +Cc: akpm, linux-kernel
> > +/*
> > + * Internally used to invert the rating, so lower is better.
> > + */
> > +#define CLOCKSOURCE_RATING(x) (INT_MAX-x)
>
> Since this is used for the plist bits, could it get a more explicit
> name?
Sure, like CLOCKSOURCE_INVERT_RATING()
> > int clocksource_register(struct clocksource *c)
> > {
> > int ret = 0;
> > unsigned long flags;
> >
> > + if (unlikely(!c))
> > + return -EINVAL;
> > +
>
> I'm not sure I understand the need for this. Is it really likely someone
> would pass NULL to clocksource_register()?
Not likely, I was just covering all possibilities.. It might be better
as a BUG_ON() actually.
Daniel
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [PATCH 04/10] -mm clocksource: add some new API calls
2006-08-04 19:28 ` Daniel Walker
@ 2006-08-04 21:05 ` Thomas Gleixner
0 siblings, 0 replies; 21+ messages in thread
From: Thomas Gleixner @ 2006-08-04 21:05 UTC (permalink / raw)
To: Daniel Walker; +Cc: john stultz, akpm, linux-kernel
On Fri, 2006-08-04 at 12:28 -0700, Daniel Walker wrote:
> > > int clocksource_register(struct clocksource *c)
> > > {
> > > int ret = 0;
> > > unsigned long flags;
> > >
> > > + if (unlikely(!c))
> > > + return -EINVAL;
> > > +
> >
> > I'm not sure I understand the need for this. Is it really likely someone
> > would pass NULL to clocksource_register()?
>
> Not likely, I was just covering all possibilities.. It might be better
> as a BUG_ON() actually.
BUG_ON is the only thing, which can be correct here. Registering a NULL
clocksource simply is a bug, but even the BUG_ON is overkill here.
tglx
^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH 05/10] -mm clocksource: convert generic timeofday
2006-08-04 3:24 [PATCH 00/10] -mm generic clocksoure API dwalker
` (3 preceding siblings ...)
2006-08-04 3:24 ` [PATCH 04/10] -mm clocksource: add some new API calls dwalker
@ 2006-08-04 3:24 ` dwalker
2006-08-04 3:24 ` [PATCH 06/10] -mm clocksource: add block notifier dwalker
` (4 subsequent siblings)
9 siblings, 0 replies; 21+ messages in thread
From: dwalker @ 2006-08-04 3:24 UTC (permalink / raw)
To: akpm; +Cc: linux-kernel, johnstul
[-- Attachment #1: clocksource_more_generic.patch --]
[-- Type: text/plain, Size: 15163 bytes --]
This patch shifts some of the code around so that the time
of day override happens inside kernel/timer.c.
The biggest timeofday changes are in update_wall_time() and
change_clocksource(). I removed the unconditional call to
change_clocksource(), and replaced it with a single atomic
check. The atomic is asserted only when a clock change is
needed. update_callback is no longer driven from
update_wall_time().
The fast path is now a single atomic check.
Signed-Off-By: Daniel Walker <dwalker@mvista.com>
---
include/linux/clocksource.h | 2
kernel/time/clocksource.c | 183 +++++---------------------------------------
kernel/timer.c | 162 +++++++++++++++++++++++++++++++++-----
3 files changed, 164 insertions(+), 183 deletions(-)
Index: linux-2.6.17/include/linux/clocksource.h
===================================================================
--- linux-2.6.17.orig/include/linux/clocksource.h
+++ linux-2.6.17/include/linux/clocksource.h
@@ -206,6 +206,8 @@ static inline void clocksource_calculate
/* used to install a new clocksource */
extern int clocksource_register(struct clocksource*);
+extern int clocksource_sysfs_register(struct sysdev_attribute*);
+extern void clocksource_sysfs_unregister(struct sysdev_attribute*);
extern void clocksource_rating_change(struct clocksource*);
extern struct clocksource * clocksource_get_clock(char*);
Index: linux-2.6.17/kernel/time/clocksource.c
===================================================================
--- linux-2.6.17.orig/kernel/time/clocksource.c
+++ linux-2.6.17/kernel/time/clocksource.c
@@ -5,6 +5,8 @@
*
* Copyright (C) 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com)
*
+ * Copyright (C) 2006 MontaVista Daniel Walker (dwalker@mvista.com)
+ *
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
@@ -21,7 +23,6 @@
*
* TODO WishList:
* o Allow clocksource drivers to be unregistered
- * o get rid of clocksource_jiffies extern
*/
#include <linux/clocksource.h>
@@ -29,51 +30,20 @@
#include <linux/init.h>
#include <linux/module.h>
-/* XXX - Would like a better way for initializing curr_clocksource */
-extern struct clocksource clocksource_jiffies;
-
/*
* Internally used to invert the rating, so lower is better.
*/
#define CLOCKSOURCE_RATING(x) (INT_MAX-x)
/*[Clocksource internal variables]---------
- * curr_clocksource:
- * currently selected clocksource. Initialized to clocksource_jiffies.
- * next_clocksource:
- * pending next selected clocksource.
* clocksource_list:
* priority list with the registered clocksources
* clocksource_lock:
- * protects manipulations to curr_clocksource and next_clocksource
- * and the clocksource_list
- * override_name:
- * Name of the user-specified clocksource.
+ * protects manipulations to the clocksource_list
*/
-static struct clocksource *curr_clocksource = &clocksource_jiffies;
-static struct clocksource *next_clocksource;
static struct plist_head clocksource_list =
PLIST_HEAD_INIT(clocksource_list, clocksource_lock);
static DEFINE_SPINLOCK(clocksource_lock);
-static char override_name[32];
-
-/**
- * clocksource_get_next - Returns the selected clocksource
- *
- */
-struct clocksource *clocksource_get_next(void)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&clocksource_lock, flags);
- if (next_clocksource) {
- curr_clocksource = next_clocksource;
- next_clocksource = NULL;
- }
- spin_unlock_irqrestore(&clocksource_lock, flags);
-
- return curr_clocksource;
-}
/**
* __is_registered - Returns a clocksource if it's registered
@@ -139,24 +109,6 @@ struct clocksource * clocksource_get_clo
return ret;
}
-
-/**
- * select_clocksource - Finds the best registered clocksource.
- *
- * Private function. Must hold clocksource_lock when called.
- *
- * Looks through the list of registered clocksources, returning
- * the one with the highest rating value. If there is a clocksource
- * name that matches the override string, it returns that clocksource.
- */
-static struct clocksource *select_clocksource(void)
-{
- if (!*override_name)
- return plist_first_entry(&clocksource_list, struct clocksource,
- list);
- return get_clock(override_name);
-}
-
/**
* clocksource_register - Used to install new clocksources
* @t: clocksource to be registered
@@ -181,8 +133,6 @@ int clocksource_register(struct clocksou
} else {
plist_node_init(&c->list, CLOCKSOURCE_RATING(c->rating));
plist_add(&c->list, &clocksource_list);
- /* scan the registered clocksources, and pick the best one */
- next_clocksource = select_clocksource();
}
spin_unlock_irqrestore(&clocksource_lock, flags);
return ret;
@@ -212,70 +162,12 @@ void clocksource_rating_change(struct cl
plist_node_init(&c->list, CLOCKSOURCE_RATING(c->rating));
plist_add(&c->list, &clocksource_list);
- next_clocksource = select_clocksource();
+ /* XXX: Add block notifier to signal new rating */
spin_unlock_irqrestore(&clocksource_lock, flags);
}
EXPORT_SYMBOL(clocksource_rating_change);
/**
- * sysfs_show_current_clocksources - sysfs interface for current clocksource
- * @dev: unused
- * @buf: char buffer to be filled with clocksource list
- *
- * Provides sysfs interface for listing current clocksource.
- */
-static ssize_t
-sysfs_show_current_clocksources(struct sys_device *dev, char *buf)
-{
- char *curr = buf;
-
- spin_lock_irq(&clocksource_lock);
- curr += sprintf(curr, "%s ", curr_clocksource->name);
- spin_unlock_irq(&clocksource_lock);
-
- curr += sprintf(curr, "\n");
-
- return curr - buf;
-}
-
-/**
- * sysfs_override_clocksource - interface for manually overriding clocksource
- * @dev: unused
- * @buf: name of override clocksource
- * @count: length of buffer
- *
- * Takes input from sysfs interface for manually overriding the default
- * clocksource selction.
- */
-static ssize_t sysfs_override_clocksource(struct sys_device *dev,
- const char *buf, size_t count)
-{
- size_t ret = count;
- /* strings from sysfs write are not 0 terminated! */
- if (count >= sizeof(override_name))
- return -EINVAL;
-
- /* strip of \n: */
- if (buf[count-1] == '\n')
- count--;
- if (count < 1)
- return -EINVAL;
-
- spin_lock_irq(&clocksource_lock);
-
- /* copy the name given: */
- memcpy(override_name, buf, count);
- override_name[count] = 0;
-
- /* try to select it: */
- next_clocksource = select_clocksource();
-
- spin_unlock_irq(&clocksource_lock);
-
- return ret;
-}
-
-/**
* sysfs_show_available_clocksources - sysfs interface for listing clocksource
* @dev: unused
* @buf: char buffer to be filled with clocksource list
@@ -304,11 +196,8 @@ sysfs_show_available_clocksources(struct
}
/*
- * Sysfs setup bits:
+ * Generic sysfs setup bits:
*/
-static SYSDEV_ATTR(current_clocksource, 0600, sysfs_show_current_clocksources,
- sysfs_override_clocksource);
-
static SYSDEV_ATTR(available_clocksource, 0600,
sysfs_show_available_clocksources, NULL);
@@ -321,6 +210,21 @@ static struct sys_device device_clocksou
.cls = &clocksource_sysclass,
};
+/**
+ * clocksource_sysfs_register - interface to register a sysfs
+ * hook under the clocksource sys_device.
+ * @attr: sysdev_attribute created with the SYSDEV_ATTR macro.
+ *
+ * This functions should be used to create a sysfs file under
+ * the clocksource directory which will be used to show the current
+ * clock used by the code calling clocksource_sysfs_register(), and
+ * set a specific overide when written to.
+ */
+int clocksource_sysfs_register(struct sysdev_attribute * attr)
+{
+ return sysdev_create_file(&device_clocksource, attr);
+}
+
static int __init init_clocksource_sysfs(void)
{
int error = sysdev_class_register(&clocksource_sysclass);
@@ -330,52 +234,7 @@ static int __init init_clocksource_sysfs
if (!error)
error = sysdev_create_file(
&device_clocksource,
- &attr_current_clocksource);
- if (!error)
- error = sysdev_create_file(
- &device_clocksource,
&attr_available_clocksource);
return error;
}
-
-device_initcall(init_clocksource_sysfs);
-
-/**
- * boot_override_clocksource - boot clock override
- * @str: override name
- *
- * Takes a clocksource= boot argument and uses it
- * as the clocksource override name.
- */
-static int __init boot_override_clocksource(char* str)
-{
- unsigned long flags;
- spin_lock_irqsave(&clocksource_lock, flags);
- if (str)
- strlcpy(override_name, str, sizeof(override_name));
- spin_unlock_irqrestore(&clocksource_lock, flags);
- return 1;
-}
-
-__setup("clocksource=", boot_override_clocksource);
-
-/**
- * boot_override_clock - Compatibility layer for deprecated boot option
- * @str: override name
- *
- * DEPRECATED! Takes a clock= boot argument and uses it
- * as the clocksource override name
- */
-static int __init boot_override_clock(char* str)
-{
- if (!strcmp(str, "pmtmr")) {
- printk("Warning: clock=pmtmr is deprecated. "
- "Use clocksource=acpi_pm.\n");
- return boot_override_clocksource("acpi_pm");
- }
- printk("Warning! clock= boot option is deprecated. "
- "Use clocksource=xyz\n");
- return boot_override_clocksource(str);
-}
-
-__setup("clock=", boot_override_clock);
+postcore_initcall(init_clocksource_sysfs);
Index: linux-2.6.17/kernel/timer.c
===================================================================
--- linux-2.6.17.orig/kernel/timer.c
+++ linux-2.6.17/kernel/timer.c
@@ -17,6 +17,8 @@
* 2000-10-05 Implemented scalable SMP per-CPU timer handling.
* Copyright (C) 2000, 2001, 2002 Ingo Molnar
* Designed by David S. Miller, Alexey Kuznetsov and Ingo Molnar
+ * 2006-08-03 Added usage of the generic clocksource API
+ * Copyright (C) 2006 MontaVista, Daniel Walker
*/
#include <linux/kernel_stat.h>
@@ -788,9 +790,15 @@ u64 current_tick_length(void)
/* XXX - all of this timekeeping code should be later moved to time.c */
#include <linux/clocksource.h>
-static struct clocksource *clock; /* pointer to current clocksource */
+/* pointer to current clocksource */
+static struct clocksource *clock = &clocksource_jiffies;
+static char clock_override_name[32];
+
+/* Interrupt update singaling variables */
+static atomic_t clock_check = ATOMIC_INIT(0);
#ifdef CONFIG_GENERIC_TIME
+
/**
* __get_nsec_offset - Returns nanoseconds since last call to periodic_hook
*
@@ -910,29 +918,120 @@ EXPORT_SYMBOL(do_settimeofday);
*
* Accumulates current time interval and initializes new clocksource
*/
-static int change_clocksource(void)
+static int change_clocksource(char * override)
{
- struct clocksource *new;
- cycle_t now;
u64 nsec;
- new = clocksource_get_next();
- if (clock != new) {
- now = clocksource_read(new);
- nsec = __get_nsec_offset();
- timespec_add_ns(&xtime, nsec);
-
- clock = new;
- clock->cycle_last = now;
- printk(KERN_INFO "Time: %s clocksource has been installed.\n",
- clock->name);
- return 1;
- } else if (clock->update_callback) {
- return clock->update_callback();
+ cycle_t now;
+ struct clocksource *new = clocksource_get_clock(override);
+
+ now = clocksource_read(new);
+ nsec = __get_nsec_offset();
+ timespec_add_ns(&xtime, nsec);
+
+ clock = new;
+ clock->cycle_last = now;
+ printk(KERN_INFO "Time: %s clocksource has been installed.\n",
+ clock->name);
+
+ return 1;
+}
+
+/**
+ * sysfs_show_current_clocksources - sysfs interface for current clocksource
+ * @dev: unused
+ * @buf: char buffer to be filled with clocksource list
+ *
+ * Provides sysfs interface for listing the current clocksource.
+ * Locking handled inside sysfs.
+ */
+static ssize_t
+sysfs_show_current_clocksources(struct sys_device *dev, char *buf)
+{
+ return sprintf(buf, "%s \n", clock->name);
+}
+
+/**
+ * sysfs_override_clocksource - interface for manually overriding clocksource
+ * @dev: unused
+ * @buf: name of override clocksource
+ * @count: length of buffer
+ *
+ * Takes input from sysfs interface for manually overriding the default
+ * clocksource selction. Locking handled inside sysfs
+ */
+static ssize_t sysfs_override_clocksource(struct sys_device *dev,
+ const char *buf, size_t count)
+{
+ size_t ret = count;
+
+ /*
+ * If there's already an update in progress then
+ * we can't proceed.
+ */
+ if (atomic_read(&clock_check))
+ return -EINVAL;
+
+ /* strings from sysfs write are not 0 terminated! */
+ if (count >= sizeof(clock_override_name))
+ return -EINVAL;
+
+ /* strip of \n: */
+ if (buf[count-1] == '\n')
+ count--;
+ if (count < 1)
+ return -EINVAL;
+
+ /* copy the name given: */
+ memcpy(clock_override_name, buf, count);
+ clock_override_name[count] = 0;
+
+ if (!clocksource_get_clock(clock_override_name)) {
+ clock_override_name[0] = 0;
+ return -EINVAL;
}
- return 0;
+
+ atomic_inc(&clock_check);
+
+ return ret;
}
+
+/*
+ * Sysfs atrribure structure.
+ */
+static SYSDEV_ATTR(timeofday_clocksource, 0600, sysfs_show_current_clocksources,
+ sysfs_override_clocksource);
+
+/**
+ * boot_override_clocksource - boot clock override
+ * @str: override name
+ *
+ * Takes a clocksource= boot argument and uses it
+ * as the clocksource override name.
+ */
+static int __init boot_override_clocksource(char* str)
+{
+ if (str) {
+ /*
+ * Make sure the clock exists.
+ */
+ if (clocksource_get_clock(str))
+ strlcpy(clock_override_name, str,
+ sizeof(clock_override_name));
+ else {
+ printk("Time: requested clock \"%s\" doesn't exist\n",
+ str);
+ return 0;
+ }
+ }
+ /* Signal the interrupt to update. */
+ atomic_inc(&clock_check);
+
+ return 1;
+}
+__setup("timeofday_clocksource=", boot_override_clocksource);
+
#else
-#define change_clocksource() do { 0; } while(0)
+#define change_clocksource(x) do { 0; } while(0)
#endif
/**
@@ -961,7 +1060,7 @@ void __init timekeeping_init(void)
unsigned long flags;
write_seqlock_irqsave(&xtime_lock, flags);
- clock = clocksource_get_next();
+ clock = clocksource_get_best_clock();
clocksource_calculate_interval(clock, tick_nsec);
clock->cycle_last = clocksource_read(clock);
ntp_clear();
@@ -1018,6 +1117,11 @@ static int __init timekeeping_init_devic
int error = sysdev_class_register(&timekeeping_sysclass);
if (!error)
error = sysdev_register(&device_timer);
+
+#ifdef CONFIG_GENERIC_TIME
+ atomic_inc(&clock_check);
+ clocksource_sysfs_register(&attr_timeofday_clocksource);
+#endif
return error;
}
@@ -1161,10 +1265,26 @@ static void update_wall_time(void)
clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift;
/* check to see if there is a new clocksource to use */
- if (change_clocksource()) {
+ if (unlikely(atomic_read(&clock_check))) {
+
+ /*
+ * Switch to the new override clock, or the highest
+ * rated clock.
+ */
+ if (*clock_override_name)
+ change_clocksource(clock_override_name);
+ else
+ change_clocksource(NULL);
+
clock->error = 0;
clock->xtime_nsec = 0;
clocksource_calculate_interval(clock, tick_nsec);
+
+ /*
+ * Remove the change signal
+ */
+ atomic_dec(&clock_check);
+
}
}
--
^ permalink raw reply [flat|nested] 21+ messages in thread* [PATCH 06/10] -mm clocksource: add block notifier
2006-08-04 3:24 [PATCH 00/10] -mm generic clocksoure API dwalker
` (4 preceding siblings ...)
2006-08-04 3:24 ` [PATCH 05/10] -mm clocksource: convert generic timeofday dwalker
@ 2006-08-04 3:24 ` dwalker
2006-08-04 3:24 ` [PATCH 07/10] -mm clocksource: remove update_callback dwalker
` (3 subsequent siblings)
9 siblings, 0 replies; 21+ messages in thread
From: dwalker @ 2006-08-04 3:24 UTC (permalink / raw)
To: akpm; +Cc: linux-kernel, johnstul
[-- Attachment #1: clocksource_add_block_notify_on_new_clock.patch --]
[-- Type: text/plain, Size: 3921 bytes --]
Adds a call back interface for register/rating change
events.
Signed-Off-By: Daniel Walker <dwalker@mvista.com>
---
include/linux/clocksource.h | 9 +++++++++
kernel/time/clocksource.c | 19 ++++++++++++++++++-
kernel/timer.c | 12 ++++++++++++
3 files changed, 39 insertions(+), 1 deletion(-)
Index: linux-2.6.17/include/linux/clocksource.h
===================================================================
--- linux-2.6.17.orig/include/linux/clocksource.h
+++ linux-2.6.17/include/linux/clocksource.h
@@ -14,6 +14,7 @@
#include <linux/list.h>
#include <linux/plist.h>
#include <linux/sysdev.h>
+#include <linux/notifier.h>
#include <asm/div64.h>
#include <asm/io.h>
@@ -26,6 +27,12 @@ typedef u64 cycle_t;
*/
extern struct clocksource clocksource_jiffies;
+/*
+ * Block notifier flags.
+ */
+#define CLOCKSOURCE_NOTIFY_REGISTER 1
+#define CLOCKSOURCE_NOTIFY_RATING 2
+
/**
* struct clocksource - hardware abstraction for a free running counter
* Provides mostly state-free accessors to the underlying hardware.
@@ -206,6 +213,8 @@ static inline void clocksource_calculate
/* used to install a new clocksource */
extern int clocksource_register(struct clocksource*);
+extern int clocksource_notifier_register(struct notifier_block*);
+extern int clocksource_notifier_unregister(struct notifier_block*);
extern int clocksource_sysfs_register(struct sysdev_attribute*);
extern void clocksource_sysfs_unregister(struct sysdev_attribute*);
extern void clocksource_rating_change(struct clocksource*);
Index: linux-2.6.17/kernel/time/clocksource.c
===================================================================
--- linux-2.6.17.orig/kernel/time/clocksource.c
+++ linux-2.6.17/kernel/time/clocksource.c
@@ -44,6 +44,18 @@
static struct plist_head clocksource_list =
PLIST_HEAD_INIT(clocksource_list, clocksource_lock);
static DEFINE_SPINLOCK(clocksource_lock);
+static ATOMIC_NOTIFIER_HEAD(clocksource_list_notifier);
+
+/**
+ * clocksource_notifier_register - Registers a list change notifier
+ * @nb: pointer to a notifier block
+ *
+ * Returns zero always.
+ */
+int clocksource_notifier_register(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_register(&clocksource_list_notifier, nb);
+}
/**
* __is_registered - Returns a clocksource if it's registered
@@ -135,6 +147,9 @@ int clocksource_register(struct clocksou
plist_add(&c->list, &clocksource_list);
}
spin_unlock_irqrestore(&clocksource_lock, flags);
+
+ atomic_notifier_call_chain(&clocksource_list_notifier,
+ CLOCKSOURCE_NOTIFY_REGISTER, c);
return ret;
}
EXPORT_SYMBOL(clocksource_register);
@@ -162,7 +177,9 @@ void clocksource_rating_change(struct cl
plist_node_init(&c->list, CLOCKSOURCE_RATING(c->rating));
plist_add(&c->list, &clocksource_list);
- /* XXX: Add block notifier to signal new rating */
+ atomic_notifier_call_chain(&clocksource_list_notifier,
+ CLOCKSOURCE_NOTIFY_RATING, c);
+
spin_unlock_irqrestore(&clocksource_lock, flags);
}
EXPORT_SYMBOL(clocksource_rating_change);
Index: linux-2.6.17/kernel/timer.c
===================================================================
--- linux-2.6.17.orig/kernel/timer.c
+++ linux-2.6.17/kernel/timer.c
@@ -1030,6 +1030,17 @@ static int __init boot_override_clocksou
}
__setup("timeofday_clocksource=", boot_override_clocksource);
+static int
+clocksource_callback(struct notifier_block *nb, unsigned long op, void *c)
+{
+ atomic_inc(&clock_check);
+ return 0;
+}
+
+static struct notifier_block clocksource_nb = {
+ .notifier_call = clocksource_callback,
+};
+
#else
#define change_clocksource(x) do { 0; } while(0)
#endif
@@ -1120,6 +1131,7 @@ static int __init timekeeping_init_devic
#ifdef CONFIG_GENERIC_TIME
atomic_inc(&clock_check);
+ clocksource_notifier_register(&clocksource_nb);
clocksource_sysfs_register(&attr_timeofday_clocksource);
#endif
return error;
--
^ permalink raw reply [flat|nested] 21+ messages in thread* [PATCH 07/10] -mm clocksource: remove update_callback
2006-08-04 3:24 [PATCH 00/10] -mm generic clocksoure API dwalker
` (5 preceding siblings ...)
2006-08-04 3:24 ` [PATCH 06/10] -mm clocksource: add block notifier dwalker
@ 2006-08-04 3:24 ` dwalker
2006-08-04 19:28 ` john stultz
2006-08-04 3:24 ` [PATCH 08/10] -mm clocksource: cleanup on -mm dwalker
` (2 subsequent siblings)
9 siblings, 1 reply; 21+ messages in thread
From: dwalker @ 2006-08-04 3:24 UTC (permalink / raw)
To: akpm; +Cc: linux-kernel, johnstul
[-- Attachment #1: clocksource_remove_update_callback.patch --]
[-- Type: text/plain, Size: 1971 bytes --]
With the new notifier block the update_callback becomes
obsolete.
Signed-Off-By: Daniel Walker <dwalker@mvista.com>
---
arch/i386/kernel/tsc.c | 5 +++--
include/linux/clocksource.h | 2 --
2 files changed, 3 insertions(+), 4 deletions(-)
Index: linux-2.6.17/arch/i386/kernel/tsc.c
===================================================================
--- linux-2.6.17.orig/arch/i386/kernel/tsc.c
+++ linux-2.6.17/arch/i386/kernel/tsc.c
@@ -60,9 +60,12 @@ static inline int check_tsc_unstable(voi
return tsc_unstable;
}
+static int tsc_update_callback(void);
void mark_tsc_unstable(void)
{
tsc_unstable = 1;
+
+ tsc_update_callback();
}
EXPORT_SYMBOL_GPL(mark_tsc_unstable);
@@ -322,7 +325,6 @@ core_initcall(cpufreq_tsc);
/* clock source code */
static unsigned long current_tsc_khz = 0;
-static int tsc_update_callback(void);
static cycle_t read_tsc(void)
{
@@ -340,7 +342,6 @@ static struct clocksource clocksource_ts
.mask = CLOCKSOURCE_MASK(64),
.mult = 0, /* to be set */
.shift = 22,
- .update_callback = tsc_update_callback,
.is_continuous = 1,
};
Index: linux-2.6.17/include/linux/clocksource.h
===================================================================
--- linux-2.6.17.orig/include/linux/clocksource.h
+++ linux-2.6.17/include/linux/clocksource.h
@@ -60,7 +60,6 @@ extern struct clocksource clocksource_ji
* subtraction of non 64 bit counters
* @mult: cycle to nanosecond multiplier
* @shift: cycle to nanosecond divisor (power of two)
- * @update_callback: called when safe to alter clocksource values
* @is_continuous: defines if clocksource is free-running.
* @cycle_interval: Used internally by timekeeping core, please ignore.
* @xtime_interval: Used internally by timekeeping core, please ignore.
@@ -73,7 +72,6 @@ struct clocksource {
cycle_t mask;
u32 mult;
u32 shift;
- int (*update_callback)(void);
int is_continuous;
/* timekeeping specific data, ignore */
--
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [PATCH 07/10] -mm clocksource: remove update_callback
2006-08-04 3:24 ` [PATCH 07/10] -mm clocksource: remove update_callback dwalker
@ 2006-08-04 19:28 ` john stultz
0 siblings, 0 replies; 21+ messages in thread
From: john stultz @ 2006-08-04 19:28 UTC (permalink / raw)
To: dwalker; +Cc: akpm, linux-kernel
On Thu, 2006-08-03 at 20:24 -0700, dwalker@mvista.com wrote:
> plain text document attachment
> (clocksource_remove_update_callback.patch)
> With the new notifier block the update_callback becomes
> obsolete.
>
> Signed-Off-By: Daniel Walker <dwalker@mvista.com>
> ---
> arch/i386/kernel/tsc.c | 5 +++--
> include/linux/clocksource.h | 2 --
> 2 files changed, 3 insertions(+), 4 deletions(-)
>
> Index: linux-2.6.17/arch/i386/kernel/tsc.c
> ===================================================================
> --- linux-2.6.17.orig/arch/i386/kernel/tsc.c
> +++ linux-2.6.17/arch/i386/kernel/tsc.c
> @@ -60,9 +60,12 @@ static inline int check_tsc_unstable(voi
> return tsc_unstable;
> }
>
> +static int tsc_update_callback(void);
> void mark_tsc_unstable(void)
> {
> tsc_unstable = 1;
> +
> + tsc_update_callback();
> }
> EXPORT_SYMBOL_GPL(mark_tsc_unstable);
Hmm. I'd like to keep mark_tsc_unstable to just be a flag rather then
the hook into the update_callback(), as it may be called quite
frequently.
I do agree we can kill the update_callback all together, but we probably
need to do sometihng like:
void mark_tsc_unstable(void)
{
if (unlikely(!tsc_unstable)) {
clocksource_tsc.rating = 50;
clocksource_rating_change(&clocksource_tsc);
}
tsc_unstable = 1;
}
> @@ -322,7 +325,6 @@ core_initcall(cpufreq_tsc);
> /* clock source code */
>
> static unsigned long current_tsc_khz = 0;
> -static int tsc_update_callback(void);
>
> static cycle_t read_tsc(void)
> {
> @@ -340,7 +342,6 @@ static struct clocksource clocksource_ts
> .mask = CLOCKSOURCE_MASK(64),
> .mult = 0, /* to be set */
> .shift = 22,
> - .update_callback = tsc_update_callback,
> .is_continuous = 1,
> };
>
> Index: linux-2.6.17/include/linux/clocksource.h
> ===================================================================
> --- linux-2.6.17.orig/include/linux/clocksource.h
> +++ linux-2.6.17/include/linux/clocksource.h
> @@ -60,7 +60,6 @@ extern struct clocksource clocksource_ji
> * subtraction of non 64 bit counters
> * @mult: cycle to nanosecond multiplier
> * @shift: cycle to nanosecond divisor (power of two)
> - * @update_callback: called when safe to alter clocksource values
> * @is_continuous: defines if clocksource is free-running.
> * @cycle_interval: Used internally by timekeeping core, please ignore.
> * @xtime_interval: Used internally by timekeeping core, please ignore.
> @@ -73,7 +72,6 @@ struct clocksource {
> cycle_t mask;
> u32 mult;
> u32 shift;
> - int (*update_callback)(void);
> int is_continuous;
>
> /* timekeeping specific data, ignore */
>
Other then the top bit I don't have an issue w/ this.
thanks
-john
^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH 08/10] -mm clocksource: cleanup on -mm
2006-08-04 3:24 [PATCH 00/10] -mm generic clocksoure API dwalker
` (6 preceding siblings ...)
2006-08-04 3:24 ` [PATCH 07/10] -mm clocksource: remove update_callback dwalker
@ 2006-08-04 3:24 ` dwalker
2006-08-04 19:53 ` john stultz
2006-08-04 3:24 ` [PATCH 09/10] -mm clocksource: initialize list value dwalker
2006-08-04 3:24 ` [PATCH 10/10] -mm clocksource: add generic sched_clock() dwalker
9 siblings, 1 reply; 21+ messages in thread
From: dwalker @ 2006-08-04 3:24 UTC (permalink / raw)
To: akpm; +Cc: linux-kernel, johnstul
[-- Attachment #1: clocksource_api_cleanup_on_mm.patch --]
[-- Type: text/plain, Size: 7214 bytes --]
Some additional clean up only on the -mm tree. Moves the adjust
functions into kernel/time/clocksource.c .
These functions directly modify the clocksource multiplier based
on ntp error. These adjustments will effect other users of that
clock. This hasn't been addressed in my patch set, since it
needs some discussion.
Signed-Off-By: Daniel Walker <dwalker@mvista.com>
---
include/linux/clocksource.h | 1
kernel/time/clocksource.c | 90 ++++++++++++++++++++++++++++++++++++++++++++
kernel/timer.c | 83 ----------------------------------------
3 files changed, 91 insertions(+), 83 deletions(-)
Index: linux-2.6.17/include/linux/clocksource.h
===================================================================
--- linux-2.6.17.orig/include/linux/clocksource.h
+++ linux-2.6.17/include/linux/clocksource.h
@@ -217,6 +217,7 @@ extern int clocksource_sysfs_register(st
extern void clocksource_sysfs_unregister(struct sysdev_attribute*);
extern void clocksource_rating_change(struct clocksource*);
extern struct clocksource * clocksource_get_clock(char*);
+extern void clocksource_adjust(struct clocksource *, s64);
/**
* clocksource_get_best_clock - Finds highest rated clocksource
Index: linux-2.6.17/kernel/time/clocksource.c
===================================================================
--- linux-2.6.17.orig/kernel/time/clocksource.c
+++ linux-2.6.17/kernel/time/clocksource.c
@@ -57,6 +57,96 @@ int clocksource_notifier_register(struct
return atomic_notifier_chain_register(&clocksource_list_notifier, nb);
}
+/*
+ * If the error is already larger, we look ahead even further
+ * to compensate for late or lost adjustments.
+ */
+static __always_inline int
+clocksource_bigadjust(struct clocksource *clock, s64 error, s64 *interval,
+ s64 *offset)
+{
+ s64 tick_error, i;
+ u32 look_ahead, adj;
+ s32 error2, mult;
+
+ /*
+ * Use the current error value to determine how much to look ahead.
+ * The larger the error the slower we adjust for it to avoid problems
+ * with losing too many ticks, otherwise we would overadjust and
+ * produce an even larger error. The smaller the adjustment the
+ * faster we try to adjust for it, as lost ticks can do less harm
+ * here. This is tuned so that an error of about 1 msec is adusted
+ * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks).
+ */
+ error2 = clock->error >> (TICK_LENGTH_SHIFT + 22 - 2 * SHIFT_HZ);
+ error2 = abs(error2);
+ for (look_ahead = 0; error2 > 0; look_ahead++)
+ error2 >>= 2;
+
+ /*
+ * Now calculate the error in (1 << look_ahead) ticks, but first
+ * remove the single look ahead already included in the error.
+ */
+ tick_error = current_tick_length() >>
+ (TICK_LENGTH_SHIFT - clock->shift + 1);
+ tick_error -= clock->xtime_interval >> 1;
+ error = ((error - tick_error) >> look_ahead) + tick_error;
+
+ /* Finally calculate the adjustment shift value. */
+ i = *interval;
+ mult = 1;
+ if (error < 0) {
+ error = -error;
+ *interval = -*interval;
+ *offset = -*offset;
+ mult = -1;
+ }
+ for (adj = 0; error > i; adj++)
+ error >>= 1;
+
+ *interval <<= adj;
+ *offset <<= adj;
+ return mult << adj;
+}
+
+/*
+ * Adjust the multiplier to reduce the error value,
+ * this is optimized for the most common adjustments of -1,0,1,
+ * for other values we can do a bit more work.
+ */
+void clocksource_adjust(struct clocksource *clock, s64 offset)
+{
+ s64 error, interval = clock->cycle_interval;
+ int adj;
+
+ error = clock->error >> (TICK_LENGTH_SHIFT - clock->shift - 1);
+ if (error > interval) {
+ error >>= 2;
+ if (likely(error <= interval))
+ adj = 1;
+ else
+ adj = clocksource_bigadjust(clock, error, &interval,
+ &offset);
+ } else if (error < -interval) {
+ error >>= 2;
+ if (likely(error >= -interval)) {
+ adj = -1;
+ interval = -interval;
+ offset = -offset;
+ } else
+ adj = clocksource_bigadjust(clock, error, &interval,
+ &offset);
+ } else
+ return;
+
+ clock->mult += adj;
+ clock->xtime_interval += interval;
+ clock->xtime_nsec -= offset;
+ clock->error -= (interval - offset) <<
+ (TICK_LENGTH_SHIFT - clock->shift);
+}
+
+
/**
* __is_registered - Returns a clocksource if it's registered
* @name: name of the clocksource to return
Index: linux-2.6.17/kernel/timer.c
===================================================================
--- linux-2.6.17.orig/kernel/timer.c
+++ linux-2.6.17/kernel/timer.c
@@ -1140,89 +1140,6 @@ static int __init timekeeping_init_devic
device_initcall(timekeeping_init_device);
/*
- * If the error is already larger, we look ahead even further
- * to compensate for late or lost adjustments.
- */
-static __always_inline int clocksource_bigadjust(s64 error, s64 *interval, s64 *offset)
-{
- s64 tick_error, i;
- u32 look_ahead, adj;
- s32 error2, mult;
-
- /*
- * Use the current error value to determine how much to look ahead.
- * The larger the error the slower we adjust for it to avoid problems
- * with losing too many ticks, otherwise we would overadjust and
- * produce an even larger error. The smaller the adjustment the
- * faster we try to adjust for it, as lost ticks can do less harm
- * here. This is tuned so that an error of about 1 msec is adusted
- * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks).
- */
- error2 = clock->error >> (TICK_LENGTH_SHIFT + 22 - 2 * SHIFT_HZ);
- error2 = abs(error2);
- for (look_ahead = 0; error2 > 0; look_ahead++)
- error2 >>= 2;
-
- /*
- * Now calculate the error in (1 << look_ahead) ticks, but first
- * remove the single look ahead already included in the error.
- */
- tick_error = current_tick_length() >> (TICK_LENGTH_SHIFT - clock->shift + 1);
- tick_error -= clock->xtime_interval >> 1;
- error = ((error - tick_error) >> look_ahead) + tick_error;
-
- /* Finally calculate the adjustment shift value. */
- i = *interval;
- mult = 1;
- if (error < 0) {
- error = -error;
- *interval = -*interval;
- *offset = -*offset;
- mult = -1;
- }
- for (adj = 0; error > i; adj++)
- error >>= 1;
-
- *interval <<= adj;
- *offset <<= adj;
- return mult << adj;
-}
-
-/*
- * Adjust the multiplier to reduce the error value,
- * this is optimized for the most common adjustments of -1,0,1,
- * for other values we can do a bit more work.
- */
-static void clocksource_adjust(struct clocksource *clock, s64 offset)
-{
- s64 error, interval = clock->cycle_interval;
- int adj;
-
- error = clock->error >> (TICK_LENGTH_SHIFT - clock->shift - 1);
- if (error > interval) {
- error >>= 2;
- if (likely(error <= interval))
- adj = 1;
- else
- adj = clocksource_bigadjust(error, &interval, &offset);
- } else if (error < -interval) {
- error >>= 2;
- if (likely(error >= -interval)) {
- adj = -1;
- interval = -interval;
- offset = -offset;
- } else
- adj = clocksource_bigadjust(error, &interval, &offset);
- } else
- return;
-
- clock->mult += adj;
- clock->xtime_interval += interval;
- clock->xtime_nsec -= offset;
- clock->error -= (interval - offset) << (TICK_LENGTH_SHIFT - clock->shift);
-}
-
-/*
* update_wall_time - Uses the current clocksource to increment the wall time
*
* Called from the timer interrupt, must hold a write on xtime_lock.
--
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [PATCH 08/10] -mm clocksource: cleanup on -mm
2006-08-04 3:24 ` [PATCH 08/10] -mm clocksource: cleanup on -mm dwalker
@ 2006-08-04 19:53 ` john stultz
2006-08-04 21:11 ` Daniel Walker
0 siblings, 1 reply; 21+ messages in thread
From: john stultz @ 2006-08-04 19:53 UTC (permalink / raw)
To: dwalker; +Cc: akpm, linux-kernel, Roman Zippel
On Thu, 2006-08-03 at 20:24 -0700, dwalker@mvista.com wrote:
> plain text document attachment (clocksource_api_cleanup_on_mm.patch)
> Some additional clean up only on the -mm tree. Moves the adjust
> functions into kernel/time/clocksource.c .
>
> These functions directly modify the clocksource multiplier based
> on ntp error. These adjustments will effect other users of that
> clock. This hasn't been addressed in my patch set, since it
> needs some discussion.
Hmmmm. Yea, some additional discussion here would probably be needed
At the moment, I'd prefer to keep the clocksource_adjust bits with the
timekeeping code, however I'd also prefer to remove the timekeeping
specific fields (cycle_last, cycle_interval, xtime_nsec, xtime_interval,
error) from the clocksource structure and instead keep them in a
timekeeping specific structure (which may also point to a clocksource).
This would keep a clean separation between the clocksource's abstraction
that keeps as little state as possible and the timekeeping code's
internal state. However the point you bring up above is an interesting
issue: Do all users of the generic clocksource structure want the
clocksource to be NTP adjusted?
If we allow for non-ntp adjusted access to the clocksources, we may have
consistency issues between users comparing say sched_clock() and
clock_gettime() intervals. Further, if those users do want NTP adjusted
counters, why aren't they just using the timekeeping subsystem?
This does put some question as to what exactly would be the uses of the
clocksource structure outside of the timekeeping realm. Sure,
sched_clock() is a reasonable example, although since sched_clock has
such specific latency needs (we probably shouldn't go touching off-chip
hardware on every sched_clock call) and can be careful to avoid TSC skew
unlike the timekeeping code, its selection algorithm is going to be very
arch specific. So I'm not sure its really an ideal use of the
clocksource interface (as its not too difficult to just keep sched_clock
arch specific).
I do feel making the abstraction clean and generic is a good thing just
for code readability (and I very much appreciate your work here!), but
I'm not really sure that the need for clocksource access outside the
timekeeping subsystem has been well expressed. Do you have some other
examples other then sched_clock that might show further uses for this
abstraction?
thanks
-john
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 08/10] -mm clocksource: cleanup on -mm
2006-08-04 19:53 ` john stultz
@ 2006-08-04 21:11 ` Daniel Walker
2006-08-04 22:16 ` john stultz
0 siblings, 1 reply; 21+ messages in thread
From: Daniel Walker @ 2006-08-04 21:11 UTC (permalink / raw)
To: john stultz; +Cc: akpm, linux-kernel, Roman Zippel
On Fri, 2006-08-04 at 12:53 -0700, john stultz wrote:
>
> Hmmmm. Yea, some additional discussion here would probably be needed
>
> At the moment, I'd prefer to keep the clocksource_adjust bits with the
> timekeeping code, however I'd also prefer to remove the timekeeping
> specific fields (cycle_last, cycle_interval, xtime_nsec, xtime_interval,
> error) from the clocksource structure and instead keep them in a
> timekeeping specific structure (which may also point to a clocksource).
>
> This would keep a clean separation between the clocksource's abstraction
> that keeps as little state as possible and the timekeeping code's
> internal state. However the point you bring up above is an interesting
> issue: Do all users of the generic clocksource structure want the
> clocksource to be NTP adjusted?
Since the output from the clocksource is a lowlevel timestamp I don't
think the users of it would want it to be ntp adjusted. It would also be
a little odd, since the ntp adjustment would be attached only to a
single clock.
> If we allow for non-ntp adjusted access to the clocksources, we may have
> consistency issues between users comparing say sched_clock() and
> clock_gettime() intervals. Further, if those users do want NTP adjusted
> counters, why aren't they just using the timekeeping subsystem?
I imagine the users of the interface would be compartmentalized. Taking
sched_clock as an example the output is only compared to itself and not
to output from other interfaces.
> This does put some question as to what exactly would be the uses of the
> clocksource structure outside of the timekeeping realm. Sure,
> sched_clock() is a reasonable example, although since sched_clock has
> such specific latency needs (we probably shouldn't go touching off-chip
> hardware on every sched_clock call) and can be careful to avoid TSC skew
> unlike the timekeeping code, its selection algorithm is going to be very
> arch specific. So I'm not sure its really an ideal use of the
> clocksource interface (as its not too difficult to just keep sched_clock
> arch specific).
Part of the reason to have a generic sched_clock() (and the generic
clocksource interface in general) is to eliminate the inefficienty of
duplicating shift and mult functionality in each arch (and on ARM it's
per board). So if you correctly implement a clocksource structure for
your hardware you will at least expose a usable sched_clock() and
generic timeofday. Then if we add more users of the interface then more
functionality is exposed.
Another instances of this is when instrumentation is needing a of fast
low level timestamp. In the past to accomplish this one would need a per
arch change to read a clock, then potentially duplicate a shift and mult
type computation in order to covert to nanosecond. One good example of
this is latency tracing in the -rt tree. I can imagine some good and
valid instrumentation having a long road of acceptable because the time
stamping portion would need to flow through several different arch and
potentially board maintainers.
I've also imagined that some usage of jiffies could be converted to use
this interface if it was appropriate. Since jiffies is hooked to the
tick, and the tick is getting more and more irregular, a clocksource
might be a relatively good replacement.
> I do feel making the abstraction clean and generic is a good thing just
> for code readability (and I very much appreciate your work here!), but
> I'm not really sure that the need for clocksource access outside the
> timekeeping subsystem has been well expressed. Do you have some other
> examples other then sched_clock that might show further uses for this
> abstraction?
I've converted latency tracing to an earlier version of the API , but I
don't have any other examples prepared. I think it's important to get
the API settled before I start converting anything else.
Daniel
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 08/10] -mm clocksource: cleanup on -mm
2006-08-04 21:11 ` Daniel Walker
@ 2006-08-04 22:16 ` john stultz
2006-08-04 23:16 ` Daniel Walker
0 siblings, 1 reply; 21+ messages in thread
From: john stultz @ 2006-08-04 22:16 UTC (permalink / raw)
To: Daniel Walker; +Cc: akpm, linux-kernel, Roman Zippel
On Fri, 2006-08-04 at 14:11 -0700, Daniel Walker wrote:
> On Fri, 2006-08-04 at 12:53 -0700, john stultz wrote:
> >
> > Hmmmm. Yea, some additional discussion here would probably be needed
> >
> > At the moment, I'd prefer to keep the clocksource_adjust bits with the
> > timekeeping code, however I'd also prefer to remove the timekeeping
> > specific fields (cycle_last, cycle_interval, xtime_nsec, xtime_interval,
> > error) from the clocksource structure and instead keep them in a
> > timekeeping specific structure (which may also point to a clocksource).
> >
> > This would keep a clean separation between the clocksource's abstraction
> > that keeps as little state as possible and the timekeeping code's
> > internal state. However the point you bring up above is an interesting
> > issue: Do all users of the generic clocksource structure want the
> > clocksource to be NTP adjusted?
>
> Since the output from the clocksource is a lowlevel timestamp I don't
> think the users of it would want it to be ntp adjusted. It would also be
> a little odd, since the ntp adjustment would be attached only to a
> single clock.
>
> > If we allow for non-ntp adjusted access to the clocksources, we may have
> > consistency issues between users comparing say sched_clock() and
> > clock_gettime() intervals. Further, if those users do want NTP adjusted
> > counters, why aren't they just using the timekeeping subsystem?
>
> I imagine the users of the interface would be compartmentalized. Taking
> sched_clock as an example the output is only compared to itself and not
> to output from other interfaces.
Agreed on both points. Although I suspect this point will need to be
made explicit.
> > This does put some question as to what exactly would be the uses of the
> > clocksource structure outside of the timekeeping realm. Sure,
> > sched_clock() is a reasonable example, although since sched_clock has
> > such specific latency needs (we probably shouldn't go touching off-chip
> > hardware on every sched_clock call) and can be careful to avoid TSC skew
> > unlike the timekeeping code, its selection algorithm is going to be very
> > arch specific. So I'm not sure its really an ideal use of the
> > clocksource interface (as its not too difficult to just keep sched_clock
> > arch specific).
>
> Part of the reason to have a generic sched_clock() (and the generic
> clocksource interface in general) is to eliminate the inefficienty of
> duplicating shift and mult functionality in each arch (and on ARM it's
> per board).
Well, a coherent accumulation and NTP adjustment method for continuous
clocksources was a big motivator for the timekeeping work. Also the
quantity of duplicated arch specific time code is a bit larger then the
sched_clock(), but that itself isn't a mark against utilizing
clocksources for sched_clock().
> So if you correctly implement a clocksource structure for
> your hardware you will at least expose a usable sched_clock() and
> generic timeofday. Then if we add more users of the interface then more
> functionality is exposed.
Well, this point might need some work. sched_clock has quite a different
correctness/performance tradeoff when compared against timeofday. If one
correctly implements a clocksource for something like the ACPI PM, I
doubt they'd want to use it for sched_clock (due to its ~1us access
latency). Additionally, since sched_clock doesn't require (for its
original purpose, at least) the TSC synchronization that is essential
for timekeeping, how will sched_clock determine which clocksource to use
on a system were the TSC is unsyched and marked bad?
> Another instances of this is when instrumentation is needing a of fast
> low level timestamp. In the past to accomplish this one would need a per
> arch change to read a clock, then potentially duplicate a shift and mult
> type computation in order to covert to nanosecond. One good example of
> this is latency tracing in the -rt tree. I can imagine some good and
> valid instrumentation having a long road of acceptable because the time
> stamping portion would need to flow through several different arch and
> potentially board maintainers.
This sounds reasonable, but also I'd question if sched_clock or
get_cycles would be appropriate here. Further, if the mult/shift cost is
acceptable, why not just use the timeofday as the cost will be similar.
> I've also imagined that some usage of jiffies could be converted to use
> this interface if it was appropriate. Since jiffies is hooked to the
> tick, and the tick is getting more and more irregular, a clocksource
> might be a relatively good replacement.
Hmmm. That'd be a harder sell for me. Probably would want those users to
move to the timeofday, or alternatively, drive jiffies off of the
timekeeping code rather then the interrupt handler to ensure it stays
synced (something I'm plotting once the timekeeping code settles down).
> > I do feel making the abstraction clean and generic is a good thing just
> > for code readability (and I very much appreciate your work here!), but
> > I'm not really sure that the need for clocksource access outside the
> > timekeeping subsystem has been well expressed. Do you have some other
> > examples other then sched_clock that might show further uses for this
> > abstraction?
>
> I've converted latency tracing to an earlier version of the API , but I
> don't have any other examples prepared. I think it's important to get
> the API settled before I start converting anything else.
Again, I think your patch set looks good for the most part (its just the
last few bits I worry about). I'm very much interested to see where you
go with this, as I feel sched_clock (on i386 atleast) needs some love
and attention and I'm excited to see new uses for the clocksource
abstraction. However, I do want to make sure that we think the use cases
out to avoid over-engineering the wrong bits.
thanks
-john
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 08/10] -mm clocksource: cleanup on -mm
2006-08-04 22:16 ` john stultz
@ 2006-08-04 23:16 ` Daniel Walker
0 siblings, 0 replies; 21+ messages in thread
From: Daniel Walker @ 2006-08-04 23:16 UTC (permalink / raw)
To: john stultz; +Cc: akpm, linux-kernel, Roman Zippel
On Fri, 2006-08-04 at 15:16 -0700, john stultz wrote:
> > I imagine the users of the interface would be compartmentalized. Taking
> > sched_clock as an example the output is only compared to itself and not
> > to output from other interfaces.
>
> Agreed on both points. Although I suspect this point will need to be
> made explicit.
Yeah, that's a good idea.
> > So if you correctly implement a clocksource structure for
> > your hardware you will at least expose a usable sched_clock() and
> > generic timeofday. Then if we add more users of the interface then more
> > functionality is exposed.
>
> Well, this point might need some work. sched_clock has quite a different
> correctness/performance tradeoff when compared against timeofday. If one
> correctly implements a clocksource for something like the ACPI PM, I
> doubt they'd want to use it for sched_clock (due to its ~1us access
> latency). Additionally, since sched_clock doesn't require (for its
> original purpose, at least) the TSC synchronization that is essential
> for timekeeping, how will sched_clock determine which clocksource to use
> on a system were the TSC is unsyched and marked bad?
sched_clock would use the highest rated clock in the system, and if that
becomes unstable it uses jiffies. That could mean using the acpi_pm if
it's the highest rated clock. Some code would have the be added to force
sched_clock to use the tsc.
I did some hackbench runs using the tsc vs. acpi_pm, and there was only
minimal differences (within the margin of error). It could be different
with other pm timers, but that was the result on mine. I also did some
tests of tsc vs. pit which showed some extensive differences. It added
10 seconds to "hackbench 80" . So I'm not entirely convinced that
acpi_pm is totally inappropriate as a fall back, in the case of
un-synced TSC.
I wish I had an HPET to test.
> > Another instances of this is when instrumentation is needing a of fast
> > low level timestamp. In the past to accomplish this one would need a per
> > arch change to read a clock, then potentially duplicate a shift and mult
> > type computation in order to covert to nanosecond. One good example of
> > this is latency tracing in the -rt tree. I can imagine some good and
> > valid instrumentation having a long road of acceptable because the time
> > stamping portion would need to flow through several different arch and
> > potentially board maintainers.
>
> This sounds reasonable, but also I'd question if sched_clock or
> get_cycles would be appropriate here. Further, if the mult/shift cost is
> acceptable, why not just use the timeofday as the cost will be similar.
get_cycles() isn't implemented on all arches. sched_clock() sometimes
returns jiffies converted to nanosecond depending on the arch (it does
this sometimes on i386 even). Also sched_clock() has the disadvantage of
converting to nanosecond each time it runs, which isn't always ideal.
get_cycles(), if it's implemented, doesn't come with a standard way to
find a) the clock it accesses b) the frequency of the clock.
So they both have disadvantages over the clocksource interface.
> > I've also imagined that some usage of jiffies could be converted to use
> > this interface if it was appropriate. Since jiffies is hooked to the
> > tick, and the tick is getting more and more irregular, a clocksource
> > might be a relatively good replacement.
>
> Hmmm. That'd be a harder sell for me. Probably would want those users to
> move to the timeofday, or alternatively, drive jiffies off of the
> timekeeping code rather then the interrupt handler to ensure it stays
> synced (something I'm plotting once the timekeeping code settles down).
It's case by case. I wouldn't say all jiffies uses could use timeofday
calls, and I wouldn't say they could all use a clocksource. I'd imagine
some could be converted to a clocksource though.
I'd be interested to see any jiffies changes you make.
> > > I do feel making the abstraction clean and generic is a good thing just
> > > for code readability (and I very much appreciate your work here!), but
> > > I'm not really sure that the need for clocksource access outside the
> > > timekeeping subsystem has been well expressed. Do you have some other
> > > examples other then sched_clock that might show further uses for this
> > > abstraction?
> >
> > I've converted latency tracing to an earlier version of the API , but I
> > don't have any other examples prepared. I think it's important to get
> > the API settled before I start converting anything else.
>
> Again, I think your patch set looks good for the most part (its just the
> last few bits I worry about). I'm very much interested to see where you
> go with this, as I feel sched_clock (on i386 atleast) needs some love
> and attention and I'm excited to see new uses for the clocksource
> abstraction. However, I do want to make sure that we think the use cases
> out to avoid over-engineering the wrong bits.
Your questions are certainly appropriate, and I appreciate the review.
There's not to many other responses so far.
Daniel
^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH 09/10] -mm clocksource: initialize list value
2006-08-04 3:24 [PATCH 00/10] -mm generic clocksoure API dwalker
` (7 preceding siblings ...)
2006-08-04 3:24 ` [PATCH 08/10] -mm clocksource: cleanup on -mm dwalker
@ 2006-08-04 3:24 ` dwalker
2006-08-04 3:24 ` [PATCH 10/10] -mm clocksource: add generic sched_clock() dwalker
9 siblings, 0 replies; 21+ messages in thread
From: dwalker @ 2006-08-04 3:24 UTC (permalink / raw)
To: akpm; +Cc: linux-kernel, johnstul
[-- Attachment #1: clocksouce_list_init.patch --]
[-- Type: text/plain, Size: 3808 bytes --]
This is an optional change to the clocksource structures. If the list
field is initialized it allows clocksource_register to complete faster
since it doesn't have the scan the list of clocks doing strcmp on each.
Signed-Off-By: Daniel Walker <dwalker@mvista.com>
---
arch/i386/kernel/hpet.c | 1 +
arch/i386/kernel/i8253.c | 1 +
arch/i386/kernel/tsc.c | 1 +
drivers/clocksource/acpi_pm.c | 1 +
drivers/clocksource/cyclone.c | 1 +
drivers/clocksource/scx200_hrt.c | 1 +
include/linux/clocksource.h | 3 +++
7 files changed, 9 insertions(+)
Index: linux-2.6.17/arch/i386/kernel/hpet.c
===================================================================
--- linux-2.6.17.orig/arch/i386/kernel/hpet.c
+++ linux-2.6.17/arch/i386/kernel/hpet.c
@@ -27,6 +27,7 @@ static struct clocksource clocksource_hp
.mult = 0, /* set below */
.shift = HPET_SHIFT,
.is_continuous = 1,
+ .list = CLOCKSOURCE_LIST_INIT(clocksource_hpet.list),
};
static int __init init_hpet_clocksource(void)
Index: linux-2.6.17/arch/i386/kernel/i8253.c
===================================================================
--- linux-2.6.17.orig/arch/i386/kernel/i8253.c
+++ linux-2.6.17/arch/i386/kernel/i8253.c
@@ -105,6 +105,7 @@ static struct clocksource clocksource_pi
.mask = CLOCKSOURCE_MASK(32),
.mult = 0,
.shift = 20,
+ .list = CLOCKSOURCE_LIST_INIT(clocksource_pit.list),
};
static int __init init_pit_clocksource(void)
Index: linux-2.6.17/arch/i386/kernel/tsc.c
===================================================================
--- linux-2.6.17.orig/arch/i386/kernel/tsc.c
+++ linux-2.6.17/arch/i386/kernel/tsc.c
@@ -282,6 +282,7 @@ static struct clocksource clocksource_ts
.mult = 0, /* to be set */
.shift = 22,
.is_continuous = 1,
+ .list = CLOCKSOURCE_LIST_INIT(clocksource_tsc.list),
};
static int tsc_update_callback(void)
Index: linux-2.6.17/drivers/clocksource/acpi_pm.c
===================================================================
--- linux-2.6.17.orig/drivers/clocksource/acpi_pm.c
+++ linux-2.6.17/drivers/clocksource/acpi_pm.c
@@ -73,6 +73,7 @@ static struct clocksource clocksource_ac
.mult = 0, /*to be caluclated*/
.shift = 22,
.is_continuous = 1,
+ .list = CLOCKSOURCE_LIST_INIT(clocksource_acpi_pm.list),
};
Index: linux-2.6.17/drivers/clocksource/cyclone.c
===================================================================
--- linux-2.6.17.orig/drivers/clocksource/cyclone.c
+++ linux-2.6.17/drivers/clocksource/cyclone.c
@@ -32,6 +32,7 @@ static struct clocksource clocksource_cy
.mult = 10,
.shift = 0,
.is_continuous = 1,
+ .list = CLOCKSOURCE_LIST_INIT(clocksource_cyclone.list),
};
static int __init init_cyclone_clocksource(void)
Index: linux-2.6.17/drivers/clocksource/scx200_hrt.c
===================================================================
--- linux-2.6.17.orig/drivers/clocksource/scx200_hrt.c
+++ linux-2.6.17/drivers/clocksource/scx200_hrt.c
@@ -58,6 +58,7 @@ static struct clocksource cs_hrt = {
.read = read_hrt,
.mask = CLOCKSOURCE_MASK(32),
.is_continuous = 1,
+ .list = CLOCKSOURCE_LIST_INIT(cs_hrt.list),
/* mult, shift are set based on mhz27 flag */
};
Index: linux-2.6.17/include/linux/clocksource.h
===================================================================
--- linux-2.6.17.orig/include/linux/clocksource.h
+++ linux-2.6.17/include/linux/clocksource.h
@@ -82,6 +82,9 @@ struct clocksource {
/* simplify initialization of mask field */
#define CLOCKSOURCE_MASK(bits) (cycle_t)(bits<64 ? ((1ULL<<bits)-1) : -1)
+/* Abstracted list initialization */
+#define CLOCKSOURCE_LIST_INIT(x) PLIST_NODE_INIT(x, 0)
+
/**
* clocksource_khz2mult - calculates mult from khz and shift
* @khz: Clocksource frequency in KHz
--
^ permalink raw reply [flat|nested] 21+ messages in thread* [PATCH 10/10] -mm clocksource: add generic sched_clock()
2006-08-04 3:24 [PATCH 00/10] -mm generic clocksoure API dwalker
` (8 preceding siblings ...)
2006-08-04 3:24 ` [PATCH 09/10] -mm clocksource: initialize list value dwalker
@ 2006-08-04 3:24 ` dwalker
9 siblings, 0 replies; 21+ messages in thread
From: dwalker @ 2006-08-04 3:24 UTC (permalink / raw)
To: akpm; +Cc: linux-kernel, johnstul
[-- Attachment #1: add_generic_sched_clock.patch --]
[-- Type: text/plain, Size: 5782 bytes --]
Adds a generic sched_clock, along with a boot time override for the
scheduler clocksource. Hopefully the config option would eventually
be removed.
Signed-Off-By: Daniel Walker <dwalker@mvista.com>
---
arch/i386/Kconfig | 4 +++
arch/i386/kernel/tsc.c | 61 ------------------------------------------------
kernel/sched.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 66 insertions(+), 61 deletions(-)
Index: linux-2.6.17/arch/i386/Kconfig
===================================================================
--- linux-2.6.17.orig/arch/i386/Kconfig
+++ linux-2.6.17/arch/i386/Kconfig
@@ -18,6 +18,10 @@ config GENERIC_TIME
bool
default y
+config GENERIC_SCHED_CLOCK
+ bool
+ default y
+
config LOCKDEP_SUPPORT
bool
default y
Index: linux-2.6.17/arch/i386/kernel/tsc.c
===================================================================
--- linux-2.6.17.orig/arch/i386/kernel/tsc.c
+++ linux-2.6.17/arch/i386/kernel/tsc.c
@@ -69,65 +69,6 @@ void mark_tsc_unstable(void)
}
EXPORT_SYMBOL_GPL(mark_tsc_unstable);
-/* Accellerators for sched_clock()
- * convert from cycles(64bits) => nanoseconds (64bits)
- * basic equation:
- * ns = cycles / (freq / ns_per_sec)
- * ns = cycles * (ns_per_sec / freq)
- * ns = cycles * (10^9 / (cpu_khz * 10^3))
- * ns = cycles * (10^6 / cpu_khz)
- *
- * Then we use scaling math (suggested by george@mvista.com) to get:
- * ns = cycles * (10^6 * SC / cpu_khz) / SC
- * ns = cycles * cyc2ns_scale / SC
- *
- * And since SC is a constant power of two, we can convert the div
- * into a shift.
- *
- * We can use khz divisor instead of mhz to keep a better percision, since
- * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
- * (mathieu.desnoyers@polymtl.ca)
- *
- * -johnstul@us.ibm.com "math is hard, lets go shopping!"
- */
-static unsigned long cyc2ns_scale __read_mostly;
-
-#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
-
-static inline void set_cyc2ns_scale(unsigned long cpu_khz)
-{
- cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
-}
-
-static inline unsigned long long cycles_2_ns(unsigned long long cyc)
-{
- return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
-}
-
-/*
- * Scheduler clock - returns current time in nanosec units.
- */
-unsigned long long sched_clock(void)
-{
- unsigned long long this_offset;
-
- /*
- * in the NUMA case we dont use the TSC as they are not
- * synchronized across all CPUs.
- */
-#ifndef CONFIG_NUMA
- if (!cpu_khz || check_tsc_unstable())
-#endif
- /* no locking but a rare wrong value is not a big deal */
- return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
-
- /* read the Time Stamp Counter: */
- rdtscll(this_offset);
-
- /* return the value in ns */
- return cycles_2_ns(this_offset);
-}
-
static unsigned long calculate_cpu_khz(void)
{
unsigned long long start, end;
@@ -210,7 +151,6 @@ void tsc_init(void)
(unsigned long)cpu_khz / 1000,
(unsigned long)cpu_khz % 1000);
- set_cyc2ns_scale(cpu_khz);
use_tsc_delay();
}
@@ -285,7 +225,6 @@ time_cpufreq_notifier(struct notifier_bl
ref_freq, freq->new);
if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
tsc_khz = cpu_khz;
- set_cyc2ns_scale(cpu_khz);
/*
* TSC based sched_clock turns
* to junk w/ cpufreq
Index: linux-2.6.17/kernel/sched.c
===================================================================
--- linux-2.6.17.orig/kernel/sched.c
+++ linux-2.6.17/kernel/sched.c
@@ -16,6 +16,7 @@
* by Davide Libenzi, preemptible kernel bits by Robert Love.
* 2003-09-03 Interactivity tuning by Con Kolivas.
* 2004-04-02 Scheduler domains code by Nick Piggin
+ * 2006-08-03 Generic sched_clock() implementation by Daniel Walker
*/
#include <linux/mm.h>
@@ -53,6 +54,7 @@
#include <linux/acct.h>
#include <linux/kprobes.h>
#include <linux/delayacct.h>
+#include <linux/clocksource.h>
#include <asm/tlb.h>
#include <asm/unistd.h>
@@ -6843,6 +6845,66 @@ int in_sched_functions(unsigned long add
&& addr < (unsigned long)__sched_text_end);
}
+#ifdef CONFIG_GENERIC_SCHED_CLOCK
+static struct clocksource *sched_clocksource = &clocksource_jiffies;
+static char __initdata sched_clock_override[32];
+
+unsigned long long sched_clock(void)
+{
+ return cyc2ns(sched_clocksource, clocksource_read(sched_clocksource));
+}
+
+static int __init boot_override_sched_clocksource(char* str)
+{
+ if (str)
+ strlcpy(sched_clock_override, str,
+ sizeof(sched_clock_override));
+
+ return 1;
+}
+__setup("sched_clocksource=", boot_override_sched_clocksource);
+
+static int
+sched_clock_callback(struct notifier_block *nb, unsigned long op, void *c)
+{
+ /*
+ * If our clock just became unstable switch to the safe,
+ * slow, fast jiffies clock.
+ *
+ * XXX : We could just switch to the next best clock.
+ */
+ if (op == CLOCKSOURCE_NOTIFY_RATING && sched_clocksource == c)
+ sched_clocksource = &clocksource_jiffies;
+ return 0;
+}
+
+static struct notifier_block sched_clock_nb = {
+ .notifier_call = sched_clock_callback,
+};
+
+static int __init sched_clock_init(void)
+{
+ clocksource_notifier_register(&sched_clock_nb);
+
+ if (*sched_clock_override != 0) {
+ sched_clocksource = clocksource_get_clock(sched_clock_override);
+ if (unlikely(sched_clocksource == NULL)) {
+ sched_clocksource = clocksource_get_best_clock();
+ printk(KERN_ERR "Warning: "
+ "Invalid scheduler clock override.\n");
+ return 1;
+ }
+
+ printk(KERN_INFO "Scheduler: %s clocksource has been "
+ "installed.\n", sched_clocksource->name);
+ } else
+ sched_clocksource = clocksource_get_best_clock();
+
+ return 0;
+}
+__initcall(sched_clock_init);
+#endif /* CONFIG_GENERIC_SCHED_CLOCK */
+
void __init sched_init(void)
{
int i, j, k;
--
^ permalink raw reply [flat|nested] 21+ messages in thread