* [PATCH 7/10] linux 2.6.18: time handling
@ 2007-03-05 11:17 Jan Beulich
2007-03-05 15:03 ` Keir Fraser
2007-03-06 11:38 ` Keir Fraser
0 siblings, 2 replies; 6+ messages in thread
From: Jan Beulich @ 2007-03-05 11:17 UTC (permalink / raw)
To: xen-devel
Remove struct timer_opts left-overs, add a Xen clocksource, and adjust
conditionals for x86-64.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Index: head-2007-02-27/arch/i386/Kconfig
===================================================================
--- head-2007-02-27.orig/arch/i386/Kconfig 2007-03-05 10:00:18.000000000 +0100
+++ head-2007-02-27/arch/i386/Kconfig 2007-02-27 16:27:37.000000000 +0100
@@ -16,7 +16,6 @@ config X86_32
config GENERIC_TIME
bool
- depends on !X86_XEN
default y
config LOCKDEP_SUPPORT
Index: head-2007-02-27/arch/i386/kernel/Makefile
===================================================================
--- head-2007-02-27.orig/arch/i386/kernel/Makefile 2007-03-05 10:00:18.000000000 +0100
+++ head-2007-02-27/arch/i386/kernel/Makefile 2007-02-27 16:27:37.000000000 +0100
@@ -93,7 +93,7 @@ include $(srctree)/scripts/Makefile.xen
obj-y += fixup.o
microcode-$(subst m,y,$(CONFIG_MICROCODE)) := microcode-xen.o
-n-obj-xen := i8259.o timers/ reboot.o smpboot.o trampoline.o
+n-obj-xen := i8259.o reboot.o smpboot.o trampoline.o tsc.o
obj-y := $(call filterxen, $(obj-y), $(n-obj-xen))
obj-y := $(call cherrypickxen, $(obj-y))
Index: head-2007-02-27/arch/i386/kernel/time-xen.c
===================================================================
--- head-2007-02-27.orig/arch/i386/kernel/time-xen.c 2007-03-05 10:00:18.000000000 +0100
+++ head-2007-02-27/arch/i386/kernel/time-xen.c 2007-02-27 16:27:37.000000000 +0100
@@ -96,10 +96,6 @@ extern unsigned long wall_jiffies;
DEFINE_SPINLOCK(rtc_lock);
EXPORT_SYMBOL(rtc_lock);
-extern struct init_timer_opts timer_tsc_init;
-extern struct timer_opts timer_tsc;
-#define timer_none timer_tsc
-
/* These are peridically updated in shared_info, and then copied here. */
struct shadow_time_info {
u64 tsc_timestamp; /* TSC at last update of time vals. */
@@ -238,6 +234,7 @@ static u64 get_nsec_offset(struct shadow
return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
}
+#ifdef CONFIG_X86_64
static unsigned long get_usec_offset(struct shadow_time_info *shadow)
{
u64 now, delta;
@@ -245,6 +242,7 @@ static unsigned long get_usec_offset(str
delta = now - shadow->tsc_timestamp;
return scale_delta(delta, shadow->tsc_to_usec_mul, shadow->tsc_shift);
}
+#endif
static void __update_wallclock(time_t sec, long nsec)
{
@@ -354,6 +352,8 @@ void rtc_cmos_write(unsigned char val, u
}
EXPORT_SYMBOL(rtc_cmos_write);
+#ifdef CONFIG_X86_64
+
/*
* This version of gettimeofday has microsecond resolution
* and better than microsecond precision on fast x86 machines with TSC.
@@ -482,6 +482,8 @@ int do_settimeofday(struct timespec *tv)
EXPORT_SYMBOL(do_settimeofday);
+#endif
+
static void sync_xen_wallclock(unsigned long dummy);
static DEFINE_TIMER(sync_xen_wallclock_timer, sync_xen_wallclock, 0, 0);
static void sync_xen_wallclock(unsigned long dummy)
@@ -533,11 +535,15 @@ static int set_rtc_mmss(unsigned long no
return retval;
}
+#ifdef CONFIG_X86_64
/* monotonic_clock(): returns # of nanoseconds passed since time_init()
* Note: This function is required to return accurate
* time even in the absence of multiple timer ticks.
*/
unsigned long long monotonic_clock(void)
+#else
+unsigned long long sched_clock(void)
+#endif
{
int cpu = get_cpu();
struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
@@ -557,9 +563,9 @@ unsigned long long monotonic_clock(void)
return time;
}
+#ifdef CONFIG_X86_64
EXPORT_SYMBOL(monotonic_clock);
-#ifdef __x86_64__
unsigned long long sched_clock(void)
{
return monotonic_clock();
@@ -927,11 +933,11 @@ void __init time_init(void)
update_wallclock();
+#ifdef CONFIG_X86_64
init_cpu_khz();
printk(KERN_INFO "Xen reported: %u.%03u MHz processor.\n",
cpu_khz / 1000, cpu_khz % 1000);
-#if defined(__x86_64__)
vxtime.mode = VXTIME_TSC;
vxtime.quot = (1000000L << 32) / vxtime_hz;
vxtime.tsc_quot = (1000L << 32) / cpu_khz;
@@ -1074,6 +1080,58 @@ void local_teardown_timer(unsigned int c
}
#endif
+#ifndef CONFIG_X86_64
+
+void tsc_init(void)
+{
+ init_cpu_khz();
+ printk(KERN_INFO "Xen reported: %u.%03u MHz processor.\n",
+ cpu_khz / 1000, cpu_khz % 1000);
+
+ use_tsc_delay();
+}
+
+#include <linux/clocksource.h>
+
+void mark_tsc_unstable(void)
+{
+#ifndef CONFIG_XEN /* XXX Should tell the hypervisor about this fact. */
+ tsc_unstable = 1;
+#endif
+}
+EXPORT_SYMBOL_GPL(mark_tsc_unstable);
+
+static cycle_t read_tsc(void)
+{
+ cycle_t ret;
+
+ rdtscll(ret);
+
+ return ret;
+}
+
+static struct clocksource clocksource_xen = {
+ .name = "xen",
+ .rating = 300,
+ .read = read_tsc,
+ .mask = CLOCKSOURCE_MASK(64),
+ .mult = 0, /* to be set */
+ .shift = 22,
+ .is_continuous = 1,
+};
+
+static int __init init_xen_clocksource(void)
+{
+ clocksource_xen.mult = clocksource_khz2mult(cpu_khz,
+ clocksource_xen.shift);
+
+ return clocksource_register(&clocksource_xen);
+}
+
+module_init(init_xen_clocksource);
+
+#endif
+
/*
* /proc/sys/xen: This really belongs in another file. It can stay here for
* now however.
Index: head-2007-02-27/include/asm-i386/mach-xen/asm/processor.h
===================================================================
--- head-2007-02-27.orig/include/asm-i386/mach-xen/asm/processor.h 2007-03-05 10:00:18.000000000 +0100
+++ head-2007-02-27/include/asm-i386/mach-xen/asm/processor.h 2007-02-27 16:27:37.000000000 +0100
@@ -23,7 +23,7 @@
#include <xen/interface/physdev.h>
/* flag for disabling the tsc */
-extern int tsc_disable;
+#define tsc_disable 0
struct desc_struct {
unsigned long a,b;
Index: head-2007-02-27/include/asm-x86_64/mach-xen/asm/timer.h
===================================================================
--- head-2007-02-27.orig/include/asm-x86_64/mach-xen/asm/timer.h 2007-03-05 11:14:12.000000000 +0100
+++ head-2007-02-27/include/asm-x86_64/mach-xen/asm/timer.h 2007-03-05 11:14:28.000000000 +0100
@@ -2,39 +2,8 @@
#define _ASMi386_TIMER_H
#include <linux/init.h>
-/**
- * struct timer_ops - used to define a timer source
- *
- * @name: name of the timer.
- * @init: Probes and initializes the timer. Takes clock= override
- * string as an argument. Returns 0 on success, anything else
- * on failure.
- * @mark_offset: called by the timer interrupt.
- * @get_offset: called by gettimeofday(). Returns the number of microseconds
- * since the last timer interupt.
- * @monotonic_clock: returns the number of nanoseconds since the init of the
- * timer.
- * @delay: delays this many clock cycles.
- */
-struct timer_opts {
- char* name;
- void (*mark_offset)(void);
- unsigned long (*get_offset)(void);
- unsigned long long (*monotonic_clock)(void);
- void (*delay)(unsigned long);
- unsigned long (*read_timer)(void);
- int (*suspend)(pm_message_t state);
- int (*resume)(void);
-};
-
-struct init_timer_opts {
- int (*init)(char *override);
- struct timer_opts *opts;
-};
-
#define TICK_SIZE (tick_nsec / 1000)
-extern struct timer_opts* __init select_timer(void);
extern void clock_fallback(void);
void setup_pit_timer(void);
@@ -42,26 +11,13 @@ void setup_pit_timer(void);
extern int pit_latch_buggy;
-extern struct timer_opts *cur_timer;
extern int timer_ack;
/* list of externed timers */
-extern struct timer_opts timer_none;
-extern struct timer_opts timer_pit;
-extern struct init_timer_opts timer_pit_init;
-extern struct init_timer_opts timer_tsc_init;
-#ifdef CONFIG_X86_CYCLONE_TIMER
-extern struct init_timer_opts timer_cyclone_init;
-#endif
-
extern unsigned long calibrate_tsc(void);
extern void init_cpu_khz(void);
#ifdef CONFIG_HPET_TIMER
-extern struct init_timer_opts timer_hpet_init;
extern unsigned long calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr);
#endif
-#ifdef CONFIG_X86_PM_TIMER
-extern struct init_timer_opts timer_pmtmr_init;
-#endif
#endif
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH 7/10] linux 2.6.18: time handling
2007-03-05 11:17 [PATCH 7/10] linux 2.6.18: time handling Jan Beulich
@ 2007-03-05 15:03 ` Keir Fraser
2007-03-05 15:09 ` Jan Beulich
2007-03-06 11:38 ` Keir Fraser
1 sibling, 1 reply; 6+ messages in thread
From: Keir Fraser @ 2007-03-05 15:03 UTC (permalink / raw)
To: Jan Beulich, xen-devel
On 5/3/07 11:17, "Jan Beulich" <jbeulich@novell.com> wrote:
> Remove struct timer_opts left-overs, add a Xen clocksource, and adjust
> conditionals for x86-64.
>
> Signed-off-by: Jan Beulich <jbeulich@novell.com>
There doesn't seem to be much advantage to this (we're already very diverged
from native i386 time handling) and the disadvantages are a bigger diff
versus what we need for x86/64 (hence more ifdefs) and independent_wallclock
probably stops working.
This is all solvable I'm sure, particularly if clocksources are the way of
the future (e.g., is x86/64 native going to use generic_time any time soon)?
-- Keir
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH 7/10] linux 2.6.18: time handling
2007-03-05 15:03 ` Keir Fraser
@ 2007-03-05 15:09 ` Jan Beulich
2007-03-05 15:20 ` Keir Fraser
0 siblings, 1 reply; 6+ messages in thread
From: Jan Beulich @ 2007-03-05 15:09 UTC (permalink / raw)
To: xen-devel, Keir Fraser
>... and independent_wallclock probably stops working.
We didn't notice any malfunction in time handling so far (which included the
released SuSE 10.2) - how would you expect such a problem to surface?
Jan
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH 7/10] linux 2.6.18: time handling
2007-03-05 15:09 ` Jan Beulich
@ 2007-03-05 15:20 ` Keir Fraser
0 siblings, 0 replies; 6+ messages in thread
From: Keir Fraser @ 2007-03-05 15:20 UTC (permalink / raw)
To: Jan Beulich, xen-devel, Keir Fraser
On 5/3/07 15:09, "Jan Beulich" <jbeulich@novell.com> wrote:
>> ... and independent_wallclock probably stops working.
>
> We didn't notice any malfunction in time handling so far (which included the
> released SuSE 10.2) - how would you expect such a problem to surface?
For example, settimeofday() will not be ignored for ordinary domUs. Also
settimeofday() calls in dom0 will not immediately propagate their effect to
other domains (but should happen during the once-per-minute wallclock
propagation to Xen). Small-ish but potentially annoying issues.
-- Keir
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH 7/10] linux 2.6.18: time handling
2007-03-05 11:17 [PATCH 7/10] linux 2.6.18: time handling Jan Beulich
2007-03-05 15:03 ` Keir Fraser
@ 2007-03-06 11:38 ` Keir Fraser
2007-03-06 15:50 ` Jeremy Fitzhardinge
1 sibling, 1 reply; 6+ messages in thread
From: Keir Fraser @ 2007-03-06 11:38 UTC (permalink / raw)
To: Jan Beulich, xen-devel; +Cc: Jeremy Fitzhardinge
On 5/3/07 11:17, "Jan Beulich" <jbeulich@novell.com> wrote:
> Remove struct timer_opts left-overs, add a Xen clocksource, and adjust
> conditionals for x86-64.
>
> Signed-off-by: Jan Beulich <jbeulich@novell.com>
The other issue afaics is that the clocksource simply does raw reads of the
TSC, subsequently scaled by cpu_khz. This isn't very accurate and will cause
horrible problems if some CPUs enter power-saving modes (not explicitly
supported yet, but can happen due to auto thermal throttling) or on bigger
systems where TSCs may not be driven from the same mainboard clock (and
hence will drift).
I think Jeremy Fitzhardinge has an alternative clocksource patch which iirc
is more in line with how Xen time works (should advertise a GHz frequency
clocksource, and do scaling of the TSC value according to time-record values
read from shared_info). Having thought about this some more I think
clocksource support is worth getting into our tree, but let's look at both
available patches and decide which is the better basis for further work.
Jeremy: If I'm not mistaken and you do have a patch floating around, could
you post it?
Thanks,
Keir
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH 7/10] linux 2.6.18: time handling
2007-03-06 11:38 ` Keir Fraser
@ 2007-03-06 15:50 ` Jeremy Fitzhardinge
0 siblings, 0 replies; 6+ messages in thread
From: Jeremy Fitzhardinge @ 2007-03-06 15:50 UTC (permalink / raw)
To: Keir Fraser; +Cc: xen-devel, Jan Beulich
[-- Attachment #1: Type: text/plain, Size: 741 bytes --]
Keir Fraser wrote:
> I think Jeremy Fitzhardinge has an alternative clocksource patch which iirc
> is more in line with how Xen time works (should advertise a GHz frequency
> clocksource, and do scaling of the TSC value according to time-record values
> read from shared_info). Having thought about this some more I think
> clocksource support is worth getting into our tree, but let's look at both
> available patches and decide which is the better basis for further work.
>
> Jeremy: If I'm not mistaken and you do have a patch floating around, could
> you post it?
>
Yes, there's a Xen clocksource in the pv_ops tree. There's no nicely
separable patch, but the mechanism is pretty simple. I've attached
arch/i386/xen/time.c
J
[-- Attachment #2: time.c --]
[-- Type: text/x-csrc, Size: 11761 bytes --]
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/kernel_stat.h>
#include <linux/clocksource.h>
#include <asm/xen/hypercall.h>
#include <asm/arch_hooks.h>
#include <xen/events.h>
#include <xen/interface/xen.h>
#include <xen/interface/vcpu.h>
#include "xen-ops.h"
#define XEN_SHIFT 22
/* Permitted clock jitter, in nsecs, beyond which a warning will be printed. */
static unsigned long permitted_clock_jitter = 10000000UL; /* 10ms */
static int __init __permitted_clock_jitter(char *str)
{
permitted_clock_jitter = simple_strtoul(str, NULL, 0);
return 1;
}
__setup("permitted_clock_jitter=", __permitted_clock_jitter);
/* These are perodically updated in shared_info, and then copied here. */
struct shadow_time_info {
u64 tsc_timestamp; /* TSC at last update of time vals. */
u64 system_timestamp; /* Time, in nanosecs, since boot. */
u32 tsc_to_nsec_mul;
int tsc_shift;
u32 version;
};
static DEFINE_PER_CPU(struct shadow_time_info, shadow_time);
/* Keep track of last time we did processing/updating of jiffies and xtime. */
static u64 processed_system_time; /* System time (ns) at last processing. */
static DEFINE_PER_CPU(u64, processed_system_time);
/* How much CPU time was spent blocked and how much was 'stolen'? */
static DEFINE_PER_CPU(u64, processed_stolen_time);
static DEFINE_PER_CPU(u64, processed_blocked_time);
/* Current runstate of each CPU (updated automatically by the hypervisor). */
static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
/* Must be signed, as it's compared with s64 quantities which can be -ve. */
#define NS_PER_TICK (1000000000LL/HZ)
unsigned long xen_cpu_khz(void)
{
u64 cpu_khz = 1000000ULL << 32;
const struct vcpu_time_info *info =
&HYPERVISOR_shared_info->vcpu_info[0].time;
do_div(cpu_khz, info->tsc_to_system_mul);
if (info->tsc_shift < 0)
cpu_khz <<= -info->tsc_shift;
else
cpu_khz >>= info->tsc_shift;
return cpu_khz;
}
/*
* Reads a consistent set of time-base values from Xen, into a shadow data
* area.
*/
static void get_time_values_from_xen(void)
{
struct vcpu_time_info *src;
struct shadow_time_info *dst;
src = &read_pda(xen.vcpu)->time;
dst = &get_cpu_var(shadow_time);
do {
dst->version = src->version;
rmb();
dst->tsc_timestamp = src->tsc_timestamp;
dst->system_timestamp = src->system_time;
dst->tsc_to_nsec_mul = src->tsc_to_system_mul;
dst->tsc_shift = src->tsc_shift;
rmb();
} while ((src->version & 1) | (dst->version ^ src->version));
put_cpu_var(shadow_time);
}
static inline int time_values_up_to_date(void)
{
struct vcpu_time_info *src;
unsigned dstversion;
src = &read_pda(xen.vcpu)->time;
dstversion = get_cpu_var(shadow_time).version;
put_cpu_var(shadow_time);
rmb();
return (dstversion == src->version);
}
/*
* Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
* yielding a 64-bit result.
*/
static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
{
u64 product;
#ifdef __i386__
u32 tmp1, tmp2;
#endif
if (shift < 0)
delta >>= -shift;
else
delta <<= shift;
#ifdef __i386__
__asm__ (
"mul %5 ; "
"mov %4,%%eax ; "
"mov %%edx,%4 ; "
"mul %5 ; "
"xor %5,%5 ; "
"add %4,%%eax ; "
"adc %5,%%edx ; "
: "=A" (product), "=r" (tmp1), "=r" (tmp2)
: "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
#elif __x86_64__
__asm__ (
"mul %%rdx ; shrd $32,%%rdx,%%rax"
: "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
#else
#error implement me!
#endif
return product;
}
static u64 get_nsec_offset(struct shadow_time_info *shadow)
{
u64 now, delta;
rdtscll(now);
delta = now - shadow->tsc_timestamp;
return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
}
static void xen_timer_interrupt_hook(void)
{
s64 delta, delta_cpu, stolen, blocked;
u64 sched_time;
int i, cpu = smp_processor_id();
unsigned long ticks;
struct shadow_time_info *shadow = &__get_cpu_var(shadow_time);
struct vcpu_runstate_info *runstate = &__get_cpu_var(runstate);
do {
get_time_values_from_xen();
/* Obtain a consistent snapshot of elapsed wallclock cycles. */
delta = delta_cpu =
shadow->system_timestamp + get_nsec_offset(shadow);
if (0)
printk("tsc_timestamp=%llu system_timestamp=%llu tsc_to_nsec=%u tsc_shift=%d, version=%u, delta=%lld processed_system_time=%lld\n",
shadow->tsc_timestamp, shadow->system_timestamp,
shadow->tsc_to_nsec_mul, shadow->tsc_shift,
shadow->version, delta, processed_system_time);
delta -= processed_system_time;
delta_cpu -= __get_cpu_var(processed_system_time);
/*
* Obtain a consistent snapshot of stolen/blocked cycles. We
* can use state_entry_time to detect if we get preempted here.
*/
do {
sched_time = runstate->state_entry_time;
barrier();
stolen = runstate->time[RUNSTATE_runnable] +
runstate->time[RUNSTATE_offline] -
__get_cpu_var(processed_stolen_time);
blocked = runstate->time[RUNSTATE_blocked] -
__get_cpu_var(processed_blocked_time);
barrier();
} while (sched_time != runstate->state_entry_time);
} while (!time_values_up_to_date());
if ((unlikely(delta < -(s64)permitted_clock_jitter) ||
unlikely(delta_cpu < -(s64)permitted_clock_jitter))
&& printk_ratelimit()) {
printk("Timer ISR/%d: Time went backwards: "
"delta=%lld delta_cpu=%lld shadow=%lld "
"off=%lld processed=%lld cpu_processed=%lld\n",
cpu, delta, delta_cpu, shadow->system_timestamp,
(s64)get_nsec_offset(shadow),
processed_system_time,
__get_cpu_var(processed_system_time));
for (i = 0; i < num_online_cpus(); i++)
printk(" %d: %lld\n", i,
per_cpu(processed_system_time, i));
}
/* System-wide jiffy work. */
ticks = 0;
while(delta > NS_PER_TICK) {
delta -= NS_PER_TICK;
processed_system_time += NS_PER_TICK;
ticks++;
}
do_timer(ticks);
/*
* Account stolen ticks.
* HACK: Passing NULL to account_steal_time()
* ensures that the ticks are accounted as stolen.
*/
if ((stolen > 0) && (delta_cpu > 0)) {
delta_cpu -= stolen;
if (unlikely(delta_cpu < 0))
stolen += delta_cpu; /* clamp local-time progress */
do_div(stolen, NS_PER_TICK);
__get_cpu_var(processed_stolen_time) += stolen * NS_PER_TICK;
__get_cpu_var(processed_system_time) += stolen * NS_PER_TICK;
account_steal_time(NULL, (cputime_t)stolen);
}
/*
* Account blocked ticks.
* HACK: Passing idle_task to account_steal_time()
* ensures that the ticks are accounted as idle/wait.
*/
if ((blocked > 0) && (delta_cpu > 0)) {
delta_cpu -= blocked;
if (unlikely(delta_cpu < 0))
blocked += delta_cpu; /* clamp local-time progress */
do_div(blocked, NS_PER_TICK);
__get_cpu_var(processed_blocked_time) += blocked * NS_PER_TICK;
__get_cpu_var(processed_system_time) += blocked * NS_PER_TICK;
account_steal_time(idle_task(cpu), (cputime_t)blocked);
}
update_process_times(user_mode_vm(get_irq_regs()));
}
static cycle_t xen_clocksource_read(void)
{
struct shadow_time_info *shadow = &get_cpu_var(shadow_time);
cycle_t ret;
get_time_values_from_xen();
ret = shadow->system_timestamp + get_nsec_offset(shadow);
put_cpu_var(shadow_time);
return ret;
}
static void xen_read_wallclock(struct timespec *ts)
{
const struct shared_info *s = HYPERVISOR_shared_info;
u32 version;
u64 delta;
struct timespec now;
/* get wallclock at system boot */
do {
version = s->wc_version;
rmb();
now.tv_sec = s->wc_sec;
now.tv_nsec = s->wc_nsec;
rmb();
} while ((s->wc_version & 1) | (version ^ s->wc_version));
delta = xen_clocksource_read(); /* time since system boot */
delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec;
now.tv_nsec = do_div(delta, NSEC_PER_SEC);
now.tv_sec = delta;
set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
}
unsigned long xen_get_wallclock(void)
{
struct timespec ts;
xen_read_wallclock(&ts);
return ts.tv_sec;
}
int xen_set_wallclock(unsigned long now)
{
/* do nothing for domU */
return -1;
}
static struct clocksource xen_clocksource = {
.name = "xen",
.rating = 400,
.read = xen_clocksource_read,
.mask = ~0,
.mult = 1<<XEN_SHIFT, /* time directly in nanoseconds */
.shift = XEN_SHIFT,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
static void init_missing_ticks_accounting(int cpu)
{
struct vcpu_register_runstate_memory_area area;
struct vcpu_runstate_info *runstate = &per_cpu(runstate, cpu);
memset(runstate, 0, sizeof(*runstate));
area.addr.v = runstate;
HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu, &area);
per_cpu(processed_blocked_time, cpu) =
runstate->time[RUNSTATE_blocked];
per_cpu(processed_stolen_time, cpu) =
runstate->time[RUNSTATE_runnable] +
runstate->time[RUNSTATE_offline];
}
static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
{
/*
* Here we are in the timer irq handler. We just have irqs locally
* disabled but we don't know if the timer_bh is running on the other
* CPU. We need to avoid to SMP race with it. NOTE: we don' t need
* the irq version of write_lock because as just said we have irq
* locally disabled. -arca
*/
write_seqlock(&xtime_lock);
xen_timer_interrupt_hook();
write_sequnlock(&xtime_lock);
return IRQ_HANDLED;
}
static void setup_cpu0_timer_irq(void)
{
printk(KERN_DEBUG "installing Xen timer for CPU 0\n");
bind_virq_to_irqhandler(
VIRQ_TIMER,
0,
xen_timer_interrupt,
SA_INTERRUPT,
"timer0",
NULL);
}
__init void xen_time_init(void)
{
get_time_values_from_xen();
processed_system_time = per_cpu(shadow_time, 0).system_timestamp;
per_cpu(processed_system_time, 0) = processed_system_time;
init_missing_ticks_accounting(0);
clocksource_register(&xen_clocksource);
/* Set initial system time with full resolution */
xen_read_wallclock(&xtime);
set_normalized_timespec(&wall_to_monotonic,
-xtime.tv_sec, -xtime.tv_nsec);
tsc_disable = 0;
setup_cpu0_timer_irq();
}
/* Convert jiffies to system time. */
static u64 jiffies_to_st(unsigned long j)
{
unsigned long seq;
long delta;
u64 st;
do {
seq = read_seqbegin(&xtime_lock);
delta = j - jiffies;
if (delta < 1) {
/* Triggers in some wrap-around cases, but that's okay:
* we just end up with a shorter timeout. */
st = processed_system_time + NS_PER_TICK;
} else if (((unsigned long)delta >> (BITS_PER_LONG-3)) != 0) {
/* Very long timeout means there is no pending timer.
* We indicate this to Xen by passing zero timeout. */
st = 0;
} else {
st = processed_system_time + delta * (u64)NS_PER_TICK;
}
} while (read_seqretry(&xtime_lock, seq));
return st;
}
/*
* stop_hz_timer / start_hz_timer - enter/exit 'tickless mode' on an idle cpu
* These functions are based on implementations from arch/s390/kernel/time.c
*/
void stop_hz_timer(void)
{
unsigned int cpu = smp_processor_id();
unsigned long j;
cpu_set(cpu, nohz_cpu_mask);
/*
* See matching smp_mb in rcu_start_batch in rcupdate.c. These mbs
* ensure that if __rcu_pending (nested in rcu_needs_cpu) fetches a
* value of rcp->cur that matches rdp->quiescbatch and allows us to
* stop the hz timer then the cpumasks created for subsequent values
* of cur in rcu_start_batch are guaranteed to pick up the updated
* nohz_cpu_mask and so will not depend on this cpu.
*/
smp_mb();
/* Leave ourselves in tick mode if rcu or softirq or timer pending. */
if (rcu_needs_cpu(cpu) || local_softirq_pending() ||
(j = next_timer_interrupt(), time_before_eq(j, jiffies))) {
cpu_clear(cpu, nohz_cpu_mask);
j = jiffies + 1;
}
if (HYPERVISOR_set_timer_op(jiffies_to_st(j)) != 0)
BUG();
}
void start_hz_timer(void)
{
cpu_clear(smp_processor_id(), nohz_cpu_mask);
}
[-- Attachment #3: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2007-03-06 15:50 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-03-05 11:17 [PATCH 7/10] linux 2.6.18: time handling Jan Beulich
2007-03-05 15:03 ` Keir Fraser
2007-03-05 15:09 ` Jan Beulich
2007-03-05 15:20 ` Keir Fraser
2007-03-06 11:38 ` Keir Fraser
2007-03-06 15:50 ` Jeremy Fitzhardinge
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.