All of lore.kernel.org
 help / color / mirror / Atom feed
* RE: [PATCH] per-cpu timer changes
@ 2005-05-24  1:20 Ian Pratt
  2005-05-26 17:34 ` Don Fry
  0 siblings, 1 reply; 8+ messages in thread
From: Ian Pratt @ 2005-05-24  1:20 UTC (permalink / raw)
  To: Don Fry, Xen-devel

> Here are some patches to help xen run on a system where the 
> CPUs are not synchronized so that the tsc counters drift.  
> Without this patch I get lots of 'time went backwards' 
> messages in the linux logs.  I still get an occasional 
> message, but not the number I did previously.

Don,

This is looking good. To help other people review the patch, it might be
a good idea to post some of the design discussion we had off list as I
think the approach will be new to most people. (Perhaps put some of the
text in a comment in the hypervisor interface).

As regards the time going backwards messages, if you're seeing small -ve
deltas, I'm not surprised -- you need to round to some precision as we
won't be nanosecond accurate. Experience suggests we'll be good for a
few 10's of ns with any kind of decent crystal. We could round to e.g.
512ns or 1024ns to make sure.

Best,
Ian

^ permalink raw reply	[flat|nested] 8+ messages in thread
* [PATCH] per-cpu timer changes
@ 2005-06-07 21:16 Don Fry
  0 siblings, 0 replies; 8+ messages in thread
From: Don Fry @ 2005-06-07 21:16 UTC (permalink / raw)
  To: Xen-devel

These patches implement per-cpu relative time.  They have been tested on
32-bit x86 platforms but not even compiled on x86_64 or ia64.

They solve the 'time went backwards' problems on systems with
unsynchronized cpus.  Using last week's xeno-unstable I had no problems
with these patches.  Using today's bits, the system will sometimes panic
when booting dom0 between 'PCI: IRQ init' and 'Grant table initialized'.

I would like to get wider testing of these changes.

Signed-off-by:  Don Fry <brazilnut@us.ibm.com>


--- xeno-unstable.bk/xen/include/public/xen.h.orig	2005-06-07 09:50:08.000000000 -0700
+++ xeno-unstable.bk/xen/include/public/xen.h	2005-06-07 09:50:08.000000000 -0700
@@ -329,6 +329,21 @@ typedef struct
 } PACKED vcpu_info_t;                   /* 8 + arch */
 
 /*
+ * Xen/kernel shared data
+ *  per cpu timing information.
+ */
+typedef struct time_info_st
+{
+    u32			time_version1;
+    u32			time_version2;
+    tsc_timestamp_t	tsc_timestamp;	/* TSC at last update */
+    u64			system_time;	/* time, in nanoseconds, since boot */
+    u64			cpu_freq;	/* CPU frequency (Hz) */
+    u32			wc_sec;		/* Secs  00:00:00 UTC,  Jan 1, 1970. */
+    u32			wc_usec;	/* Usecs 00:00:00 UTC,  Jan 1, 1970. */
+} PACKED time_info_t;
+
+/*
  * Xen/kernel shared data -- pointer provided in start_info.
  * NB. We expect that this struct is smaller than a page.
  */
@@ -403,6 +418,7 @@ typedef struct shared_info_st
 
     arch_shared_info_t arch;
 
+    time_info_t vcpu_time[MAX_VIRT_CPUS];
 } PACKED shared_info_t;
 
 /*
--- xeno-unstable.bk/xen/common/schedule.c.orig	2005-06-07 10:40:50.000000000 -0700
+++ xeno-unstable.bk/xen/common/schedule.c	2005-06-07 11:23:57.000000000 -0700
@@ -53,6 +53,7 @@ string_param("sched", opt_sched);
 /* Various timer handlers. */
 static void s_timer_fn(void *unused);
 static void t_timer_fn(void *unused);
+static void tsc_timer_fn(void *unused);
 static void dom_timer_fn(void *data);
 
 /* This is global for now so that private implementations can reach it */
@@ -76,6 +77,7 @@ static struct scheduler ops;
 
 /* Per-CPU periodic timer sends an event to the currently-executing domain. */
 static struct ac_timer t_timer[NR_CPUS]; 
+static struct ac_timer tsc_timer[NR_CPUS]; 
 
 void free_domain_struct(struct domain *d)
 {
@@ -487,6 +489,7 @@ int idle_cpu(int cpu)
  * Timers: the scheduler utilises a number of timers
  * - s_timer: per CPU timer for preemption and scheduling decisions
  * - t_timer: per CPU periodic timer to send timer interrupt to current dom
+ * - tsc_timer: per CPU periodic timer to update time bases
  * - dom_timer: per domain timer to specifiy timeout values
  ****************************************************************************/
 
@@ -516,6 +519,17 @@ static void t_timer_fn(void *unused)
     set_ac_timer(&t_timer[cpu], NOW() + MILLISECS(10));
 }
 
+/* Periodic tick timer: update time bases for per-cpu timing. */
+static void tsc_timer_fn(void *unused)
+{
+    unsigned int        cpu = current->processor;
+
+    extern void percpu_ticks(void);
+    percpu_ticks();
+
+    set_ac_timer(&tsc_timer[cpu], NOW() + MILLISECS(250));
+}
+
 /* Domain timer function, sends a virtual timer interrupt to domain */
 static void dom_timer_fn(void *data)
 {
@@ -537,6 +551,7 @@ void __init scheduler_init(void)
         spin_lock_init(&schedule_data[i].schedule_lock);
         init_ac_timer(&schedule_data[i].s_timer, s_timer_fn, NULL, i);
         init_ac_timer(&t_timer[i], t_timer_fn, NULL, i);
+        init_ac_timer(&tsc_timer[i], tsc_timer_fn, NULL, i);
     }
 
     schedule_data[0].curr = idle_task[0];
@@ -566,6 +581,9 @@ void schedulers_start(void) 
 {   
     t_timer_fn(0);
     smp_call_function((void *)t_timer_fn, NULL, 1, 1);
+
+    tsc_timer_fn(0);
+    smp_call_function((void *)tsc_timer_fn, NULL, 1, 1);
 }
 
 void dump_runq(unsigned char key)


--- xeno-unstable.bk/xen/arch/x86/smpboot.c.orig	2005-06-07 10:41:14.000000000 -0700
+++ xeno-unstable.bk/xen/arch/x86/smpboot.c	2005-06-07 11:23:51.000000000 -0700
@@ -431,6 +431,7 @@ void __init start_secondary(void *unused
 
 	extern void percpu_traps_init(void);
 	extern void cpu_init(void);
+	extern void setup_percpu_time(void);
 
 	set_current(idle_task[cpu]);
 	set_processor_id(cpu);
@@ -452,6 +453,7 @@ void __init start_secondary(void *unused
 
 	setup_secondary_APIC_clock();
 	enable_APIC_timer();
+	setup_percpu_time();
 
 	/*
 	 * low-memory mappings have been cleared, flush them from


--- xeno-unstable.bk/xen/arch/x86/time.c.orig	2005-06-07 10:40:57.000000000 -0700
+++ xeno-unstable.bk/xen/arch/x86/time.c	2005-06-07 11:40:22.000000000 -0700
@@ -49,6 +49,29 @@ static u64             full_tsc_irq;    
 static s_time_t        stime_irq;       /* System time at last 'time update' */
 static unsigned long   wc_sec, wc_usec; /* UTC time at last 'time update'.   */
 static rwlock_t        time_lock = RW_LOCK_UNLOCKED;
+static time_info_t     percpu_time_info[NR_CPUS];
+
+void percpu_ticks(void)
+{
+    int cpu = smp_processor_id();
+    time_info_t *t = &percpu_time_info[cpu];
+    u64 tsc, delta;
+    u64 quarter = t->cpu_freq >> 2;
+
+    rdtscll(tsc);
+    delta = tsc - t->tsc_timestamp;
+    while (delta >= quarter) {
+	t->wc_usec += 1000000UL / 4;
+	t->system_time += 1000000000ULL / 4;
+	t->tsc_timestamp += quarter;
+	delta -= quarter;
+    }
+
+    while (t->wc_usec > 1000000UL) {
+	t->wc_sec += 1;
+	t->wc_usec -= 10000000UL;
+    }
+}
 
 void timer_interrupt(int irq, void *dev_id, struct cpu_user_regs *regs)
 {
@@ -277,20 +300,29 @@ static inline void __update_dom_time(str
 {
     struct domain *d  = v->domain;
     shared_info_t *si = d->shared_info;
+    time_info_t *dom = &si->vcpu_time[v->processor];
+    time_info_t *xen = &percpu_time_info[smp_processor_id()];
 
     spin_lock(&d->time_lock);
 
     si->time_version1++;
+    dom->time_version1++;
     wmb();
 
     si->cpu_freq       = cpu_freq;
+    dom->cpu_freq      = xen->cpu_freq;
     si->tsc_timestamp  = full_tsc_irq;
+    dom->tsc_timestamp = xen->tsc_timestamp;
     si->system_time    = stime_irq;
+    dom->system_time   = xen->system_time;
     si->wc_sec         = wc_sec;
+    dom->wc_sec        = xen->wc_sec;
     si->wc_usec        = wc_usec;
+    dom->wc_usec       = xen->wc_usec;
 
     wmb();
     si->time_version2++;
+    dom->time_version2++;
 
     spin_unlock(&d->time_lock);
 }
@@ -298,8 +330,11 @@ static inline void __update_dom_time(str
 void update_dom_time(struct vcpu *v)
 {
     unsigned long flags;
+    int cpu = smp_processor_id();
 
-    if ( v->domain->shared_info->tsc_timestamp != full_tsc_irq )
+    if ( v->domain->shared_info->tsc_timestamp != full_tsc_irq 
+    ||   v->domain->shared_info->vcpu_time[v->processor].tsc_timestamp !=
+	percpu_time_info[cpu].tsc_timestamp)
     {
         read_lock_irqsave(&time_lock, flags);
         __update_dom_time(v);
@@ -312,6 +347,7 @@ void do_settime(unsigned long secs, unsi
 {
     s64 delta;
     long _usecs = (long)usecs;
+    int i;
 
     write_lock_irq(&time_lock);
 
@@ -326,6 +362,10 @@ void do_settime(unsigned long secs, unsi
 
     wc_sec  = secs;
     wc_usec = _usecs;
+    for (i=0; i<NR_CPUS; i++) {
+	percpu_time_info[i].wc_sec = wc_sec;
+	percpu_time_info[i].wc_usec = wc_usec;
+    }
 
     /* Others will pick up the change at the next tick. */
     __update_dom_time(current);
@@ -335,16 +375,39 @@ void do_settime(unsigned long secs, unsi
 }
 
 
+spinlock_t tsc_lock = SPIN_LOCK_UNLOCKED;
+
+/*
+ * Time setup for this processor.
+ */
+void __init setup_percpu_time(void)
+{
+    unsigned long flags;
+    unsigned long ticks_per_frac;
+    int cpu = smp_processor_id();
+
+    /* only have 1 cpu calibrate at a time */
+    spin_lock_irqsave(&tsc_lock, flags);
+    ticks_per_frac = calibrate_tsc();
+    spin_unlock_irqrestore(&tsc_lock, flags);
+
+    if (!ticks_per_frac)
+	panic("Error calibrating TSC\n");
+    percpu_time_info[cpu].cpu_freq = (u64)ticks_per_frac * (u64)CALIBRATE_FRAC;
+    rdtscll(percpu_time_info[cpu].tsc_timestamp);
+    percpu_time_info[cpu].system_time = stime_irq;
+}
+
 /* Late init function (after all CPUs are booted). */
 int __init init_xen_time()
 {
     u64      scale;
     unsigned int cpu_ghz;
+    int i;
 
     cpu_ghz = (unsigned int)(cpu_freq / 1000000000ULL);
     for ( rdtsc_bitshift = 0; cpu_ghz != 0; rdtsc_bitshift++, cpu_ghz >>= 1 )
         continue;
-
     scale  = 1000000000LL << (32 + rdtsc_bitshift);
     scale /= cpu_freq;
     st_scale_f = scale & 0xffffffff;
@@ -357,6 +420,12 @@ int __init init_xen_time()
 
     /* Wallclock time starts as the initial RTC time. */
     wc_sec  = get_cmos_time();
+    for (i=0; i<NR_CPUS; i++) {
+	percpu_time_info[i].wc_sec = wc_sec;
+	percpu_time_info[i].wc_usec = 0;
+	percpu_time_info[i].system_time = stime_irq;
+	percpu_time_info[i].cpu_freq = cpu_freq;	// default speed
+    }
 
     printk("Time init:\n");
     printk(".... cpu_freq:    %08X:%08X\n", (u32)(cpu_freq>>32),(u32)cpu_freq);


--- xeno-unstable.bk/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/time.c.orig	2005-06-07 10:21:21.000000000 -0700
+++ xeno-unstable.bk/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/time.c	2005-06-07 13:06:01.000000000 -0700
@@ -105,9 +105,13 @@ struct timer_opts *cur_timer = &timer_ts
 
 /* These are peridically updated in shared_info, and then copied here. */
 u32 shadow_tsc_stamp;
+DEFINE_PER_CPU(u64, shadow_tsc_stamp);
 u64 shadow_system_time;
+DEFINE_PER_CPU(u64, shadow_system_time);
 static u32 shadow_time_version;
+DEFINE_PER_CPU(u32, shadow_time_version);
 static struct timeval shadow_tv;
+static DEFINE_PER_CPU(struct timeval, shadow_tv);
 
 /*
  * We use this to ensure that gettimeofday() is monotonically increasing. We
@@ -171,23 +175,29 @@ __setup("independent_wallclock", __indep
 static void __get_time_values_from_xen(void)
 {
 	shared_info_t *s = HYPERVISOR_shared_info;
+	int cpu = smp_processor_id();
 
 	do {
 		shadow_time_version = s->time_version2;
+		per_cpu(shadow_time_version, cpu) = s->vcpu_time[cpu].time_version2;
 		rmb();
 		shadow_tv.tv_sec    = s->wc_sec;
 		shadow_tv.tv_usec   = s->wc_usec;
 		shadow_tsc_stamp    = (u32)s->tsc_timestamp;
 		shadow_system_time  = s->system_time;
+		per_cpu(shadow_tv.tv_sec, cpu)	= s->vcpu_time[cpu].wc_sec;
+		per_cpu(shadow_tv.tv_usec, cpu)	= s->vcpu_time[cpu].wc_usec;
+		per_cpu(shadow_tsc_stamp, cpu)	= s->vcpu_time[cpu].tsc_timestamp;
+		per_cpu(shadow_system_time, cpu) = s->vcpu_time[cpu].system_time;
 		rmb();
 	}
-	while (shadow_time_version != s->time_version1);
+	while (shadow_time_version != s->time_version1 ||  per_cpu(shadow_time_version, cpu) != s->vcpu_time[cpu].time_version1);
 
 	cur_timer->mark_offset();
 }
 
 #define TIME_VALUES_UP_TO_DATE \
- ({ rmb(); (shadow_time_version == HYPERVISOR_shared_info->time_version2); })
+ ({ rmb(); ((per_cpu(shadow_time_version, cpu) == HYPERVISOR_shared_info->vcpu_time[cpu].time_version2) && (shadow_time_version == HYPERVISOR_shared_info->time_version2)); })
 
 /*
  * This version of gettimeofday has microsecond resolution
@@ -200,6 +210,7 @@ void do_gettimeofday(struct timeval *tv)
 	unsigned long max_ntp_tick;
 	unsigned long flags;
 	s64 nsec;
+	int cpu = smp_processor_id();
 
 	do {
 		unsigned long lost;
@@ -227,7 +238,7 @@ void do_gettimeofday(struct timeval *tv)
 		sec = xtime.tv_sec;
 		usec += (xtime.tv_nsec / NSEC_PER_USEC);
 
-		nsec = shadow_system_time - processed_system_time;
+		nsec = per_cpu(shadow_system_time, cpu) - per_cpu(processed_system_time, cpu);
 		__normalize_time(&sec, &nsec);
 		usec += (long)nsec / NSEC_PER_USEC;
 
@@ -273,6 +284,7 @@ int do_settimeofday(struct timespec *tv)
 	long wtm_nsec;
 	s64 nsec;
 	struct timespec xentime;
+	int cpu = smp_processor_id();
 
 	if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
 		return -EINVAL;
@@ -306,7 +318,7 @@ int do_settimeofday(struct timespec *tv)
 	 */
 	nsec -= (jiffies - wall_jiffies) * TICK_NSEC;
 
-	nsec -= (shadow_system_time - processed_system_time);
+	nsec -= (per_cpu(shadow_system_time, cpu) - per_cpu(processed_system_time, cpu));
 
 	__normalize_time(&sec, &nsec);
 	wtm_sec  = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
@@ -384,6 +396,7 @@ unsigned long profile_pc(struct pt_regs 
 EXPORT_SYMBOL(profile_pc);
 #endif
 
+extern unsigned long long get_full_tsc_offset(void);
 /*
  * timer_interrupt() needs to keep up the real-time clock,
  * as well as call the "do_timer()" routine every clocktick
@@ -392,27 +405,25 @@ static inline void do_timer_interrupt(in
 					struct pt_regs *regs)
 {
 	time_t wtm_sec, sec;
-	s64 delta, delta_cpu, nsec;
+	s64 delta_cpu, nsec;
 	long sec_diff, wtm_nsec;
 	int cpu = smp_processor_id();
 
 	do {
 		__get_time_values_from_xen();
 
-		delta = delta_cpu = (s64)shadow_system_time +
-			((s64)cur_timer->get_offset() * (s64)NSEC_PER_USEC);
-		delta     -= processed_system_time;
-		delta_cpu -= per_cpu(processed_system_time, cpu);
+		delta_cpu = (s64)per_cpu(shadow_system_time, cpu) +
+			((s64)get_full_tsc_offset())
+			- per_cpu(processed_system_time, cpu);
 	}
 	while (!TIME_VALUES_UP_TO_DATE);
 
-	if (unlikely(delta < 0) || unlikely(delta_cpu < 0)) {
+	if (unlikely(delta_cpu < 0)) {
 		printk("Timer ISR/%d: Time went backwards: "
-		       "delta=%lld cpu_delta=%lld shadow=%lld "
-		       "off=%lld processed=%lld cpu_processed=%lld\n",
-		       cpu, delta, delta_cpu, shadow_system_time,
-		       ((s64)cur_timer->get_offset() * (s64)NSEC_PER_USEC), 
-		       processed_system_time,
+		       "cpu_delta=%lld cpu_shadow=%lld "
+		       "off=%lld cpu_processed=%lld\n",
+		       cpu, delta_cpu, per_cpu(shadow_system_time, cpu),
+		       (s64)get_full_tsc_offset(), 
 		       per_cpu(processed_system_time, cpu));
 		for (cpu = 0; cpu < num_online_cpus(); cpu++)
 			printk(" %d: %lld\n", cpu,
@@ -420,19 +431,15 @@ static inline void do_timer_interrupt(in
 		return;
 	}
 
-	/* System-wide jiffy work. */
-	while (delta >= NS_PER_TICK) {
-		delta -= NS_PER_TICK;
-		processed_system_time += NS_PER_TICK;
-		do_timer(regs);
-	}
-
 	/* Local CPU jiffy work. */
 	while (delta_cpu >= NS_PER_TICK) {
 		delta_cpu -= NS_PER_TICK;
 		per_cpu(processed_system_time, cpu) += NS_PER_TICK;
 		update_process_times(user_mode(regs));
 		profile_tick(CPU_PROFILING, regs);
+		/* System-wide jiffy work. */
+		if (cpu == 0)
+			do_timer(regs);
 	}
 
 	if (cpu != 0)
@@ -447,19 +454,19 @@ static inline void do_timer_interrupt(in
 	    ((time_status & STA_UNSYNC) != 0) &&
 	    (xtime.tv_sec > (last_update_from_xen + 60))) {
 		/* Adjust shadow for jiffies that haven't updated xtime yet. */
-		shadow_tv.tv_usec -= 
+		per_cpu(shadow_tv.tv_usec, cpu) -= 
 			(jiffies - wall_jiffies) * (USEC_PER_SEC / HZ);
-		HANDLE_USEC_UNDERFLOW(shadow_tv);
+		HANDLE_USEC_UNDERFLOW(per_cpu(shadow_tv, cpu));
 
 		/*
 		 * Reset our running time counts if they are invalidated by
 		 * a warp backwards of more than 500ms.
 		 */
-		sec_diff = xtime.tv_sec - shadow_tv.tv_sec;
+		sec_diff = xtime.tv_sec - per_cpu(shadow_tv.tv_sec, cpu);
 		if (unlikely(abs(sec_diff) > 1) ||
 		    unlikely(((sec_diff * USEC_PER_SEC) +
 			      (xtime.tv_nsec / NSEC_PER_USEC) -
-			      shadow_tv.tv_usec) > 500000)) {
+			      per_cpu(shadow_tv.tv_usec, cpu)) > 500000)) {
 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
 			last_rtc_update = last_update_to_xen = 0;
 #endif
@@ -467,8 +474,8 @@ static inline void do_timer_interrupt(in
 		}
 
 		/* Update our unsynchronised xtime appropriately. */
-		sec = shadow_tv.tv_sec;
-		nsec = shadow_tv.tv_usec * NSEC_PER_USEC;
+		sec = per_cpu(shadow_tv.tv_sec, cpu);
+		nsec = per_cpu(shadow_tv.tv_usec, cpu) * NSEC_PER_USEC;
 
 		__normalize_time(&sec, &nsec);
 		wtm_sec  = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
@@ -498,7 +505,7 @@ static inline void do_timer_interrupt(in
 		op.cmd = DOM0_SETTIME;
 		op.u.settime.secs        = tv.tv_sec;
 		op.u.settime.usecs       = tv.tv_usec;
-		op.u.settime.system_time = shadow_system_time;
+		op.u.settime.system_time = per_cpu(shadow_system_time, cpu);
 		HYPERVISOR_dom0_op(&op);
 
 		last_update_to_xen = xtime.tv_sec;
@@ -670,7 +677,7 @@ void __init time_init(void)
 	set_normalized_timespec(&wall_to_monotonic,
 		-xtime.tv_sec, -xtime.tv_nsec);
 	processed_system_time = shadow_system_time;
-	per_cpu(processed_system_time, 0) = processed_system_time;
+	per_cpu(processed_system_time, 0) = per_cpu(shadow_system_time, 0);
 
 	if (timer_tsc_init.init(NULL) != 0)
 		BUG();
@@ -753,7 +760,7 @@ void time_resume(void)
 
 	/* Reset our own concept of passage of system time. */
 	processed_system_time = shadow_system_time;
-	per_cpu(processed_system_time, 0) = processed_system_time;
+	per_cpu(processed_system_time, 0) = per_cpu(shadow_system_time, 0);
 
 	/* Accept a warp in UTC (wall-clock) time. */
 	last_seen_tv.tv_sec = 0;
@@ -770,7 +777,7 @@ void local_setup_timer(void)
 
 	do {
 		seq = read_seqbegin(&xtime_lock);
-		per_cpu(processed_system_time, cpu) = shadow_system_time;
+		per_cpu(processed_system_time, cpu) = per_cpu(shadow_system_time, cpu);
 	} while (read_seqretry(&xtime_lock, seq));
 
 	per_cpu(timer_irq, cpu) = bind_virq_to_irq(VIRQ_TIMER);


--- xeno-unstable.bk/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/timers/timer_tsc.c.orig	2005-06-07 10:21:29.000000000 -0700
+++ xeno-unstable.bk/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/timers/timer_tsc.c	2005-06-07 10:21:29.000000000 -0700
@@ -10,6 +10,7 @@
 #include <linux/cpufreq.h>
 #include <linux/string.h>
 #include <linux/jiffies.h>
+#include <linux/percpu.h>
 
 #include <asm/timer.h>
 #include <asm/io.h>
@@ -35,8 +36,8 @@ extern spinlock_t i8253_lock;
 
 static int use_tsc;
 
-static unsigned long long monotonic_base;
-static u32 monotonic_offset;
+static DEFINE_PER_CPU(unsigned long long, monotonic_base);
+static DEFINE_PER_CPU(u32, monotonic_offset);
 static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
 
 /* convert from cycles(64bits) => nanoseconds (64bits)
@@ -74,8 +75,20 @@ static inline unsigned long long cycles_
  */
 static unsigned long fast_gettimeoffset_quotient;
 
-extern u32 shadow_tsc_stamp;
-extern u64 shadow_system_time;
+extern DEFINE_PER_CPU(u64, shadow_tsc_stamp);
+extern DEFINE_PER_CPU(u64, shadow_system_time);
+
+unsigned long long get_full_tsc_offset(void)
+{
+	unsigned long long tsc;
+
+	/* Read the Time Stamp Counter */
+	rdtscll(tsc);
+
+	tsc -= per_cpu(shadow_tsc_stamp, smp_processor_id());
+
+	return cycles_2_ns(tsc);
+}
 
 static unsigned long get_offset_tsc(void)
 {
@@ -86,7 +99,7 @@ static unsigned long get_offset_tsc(void
 	rdtsc(eax,edx);
 
 	/* .. relative to previous jiffy (32 bits is enough) */
-	eax -= shadow_tsc_stamp;
+	eax -= per_cpu(shadow_tsc_stamp, smp_processor_id());
 
 	/*
          * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
@@ -110,12 +123,13 @@ static unsigned long long monotonic_cloc
 {
 	unsigned long long last_offset, this_offset, base;
 	unsigned seq;
+	int cpu = smp_processor_id();
 	
 	/* atomically read monotonic base & last_offset */
 	do {
 		seq = read_seqbegin(&monotonic_lock);
-		last_offset = monotonic_offset;
-		base = monotonic_base;
+		last_offset = per_cpu(monotonic_offset, cpu);
+		base = per_cpu(monotonic_base, cpu);
 	} while (read_seqretry(&monotonic_lock, seq));
 
 	/* Read the Time Stamp Counter */
@@ -152,11 +166,12 @@ unsigned long long sched_clock(void)
 
 static void mark_offset_tsc(void)
 {
+	int cpu = smp_processor_id();
 
 	/* update the monotonic base value */
 	write_seqlock(&monotonic_lock);
-	monotonic_base = shadow_system_time;
-	monotonic_offset = shadow_tsc_stamp;
+	per_cpu(monotonic_base, cpu) = per_cpu(shadow_system_time, cpu);
+	per_cpu(monotonic_offset, cpu) = per_cpu(shadow_tsc_stamp, cpu);
 	write_sequnlock(&monotonic_lock);
 }
 
-- 
Don Fry
brazilnut@us.ibm.com

^ permalink raw reply	[flat|nested] 8+ messages in thread
* [PATCH] per-cpu timer changes
@ 2005-05-24  0:35 Don Fry
  0 siblings, 0 replies; 8+ messages in thread
From: Don Fry @ 2005-05-24  0:35 UTC (permalink / raw)
  To: Xen-devel

Here are some patches to help xen run on a system where the CPUs are not
synchronized so that the tsc counters drift.  Without this patch I get
lots of 'time went backwards' messages in the linux logs.  I still get
an occasional message, but not the number I did previously.

It has been tested on an x86 box with 4 hyper threaded cpus (8 logical),
but has not been tested with x86-64 or any other processor.  The patches
are against today's xeno-unstable.bk bits.

I am still trying to understand why I get any 'time went backwards'
messages, but thought I would post this now to get feedback as it is.


--- xeno-unstable.bk/xen/include/public/xen.h.orig	2005-05-23 17:24:21.000000000 -0700
+++ xeno-unstable.bk/xen/include/public/xen.h	2005-05-23 16:37:22.000000000 -0700
@@ -338,6 +338,21 @@ typedef struct
 } PACKED vcpu_info_t;                   /* 8 + arch */
 
 /*
+ * Xen/kernel shared data
+ *  per cpu timing information.
+ */
+typedef struct time_info_st
+{
+    u32			time_version1;
+    u32			time_version2;
+    tsc_timestamp_t	tsc_timestamp;	/* TSC at last update */
+    u64			system_time;	/* time, in nanoseconds, since boot */
+    u64			cpu_freq;	/* CPU frequency (Hz) */
+    u32			wc_sec;		/* Secs  00:00:00 UTC,  Jan 1, 1970. */
+    u32			wc_usec;	/* Usecs 00:00:00 UTC,  Jan 1, 1970. */
+} PACKED time_info_t;
+
+/*
  * Xen/kernel shared data -- pointer provided in start_info.
  * NB. We expect that this struct is smaller than a page.
  */
@@ -412,6 +427,7 @@ typedef struct shared_info_st
 
     arch_shared_info_t arch;
 
+    time_info_t vcpu_time[MAX_VIRT_CPUS];
 } PACKED shared_info_t;
 
 /*

--- xeno-unstable.bk/xen/common/schedule.c.orig	2005-05-23 17:25:39.000000000 -0700
+++ xeno-unstable.bk/xen/common/schedule.c	2005-05-23 16:42:24.000000000 -0700
@@ -57,6 +57,7 @@ string_param("sched", opt_sched);
 /* Various timer handlers. */
 static void s_timer_fn(unsigned long unused);
 static void t_timer_fn(unsigned long unused);
+static void tsc_timer_fn(unsigned long unused);
 static void dom_timer_fn(unsigned long data);
 
 /* This is global for now so that private implementations can reach it */
@@ -80,6 +81,7 @@ static struct scheduler ops;
 
 /* Per-CPU periodic timer sends an event to the currently-executing domain. */
 static struct ac_timer t_timer[NR_CPUS]; 
+static struct ac_timer tsc_timer[NR_CPUS];
 
 void free_domain_struct(struct domain *d)
 {
@@ -514,6 +516,7 @@ int idle_cpu(int cpu)
  * Timers: the scheduler utilises a number of timers
  * - s_timer: per CPU timer for preemption and scheduling decisions
  * - t_timer: per CPU periodic timer to send timer interrupt to current dom
+ * - tsc_timer: per CPU periodic timer to update time bases
  * - dom_timer: per domain timer to specifiy timeout values
  ****************************************************************************/
 
@@ -548,6 +551,18 @@ static void t_timer_fn(unsigned long unu
     add_ac_timer(&t_timer[cpu]);
 }
 
+/* Periodic tick timer: update time bases for per-cpu timing. */
+static void tsc_timer_fn(unsigned long unused)
+{
+    unsigned int        cpu = current->processor;
+
+    extern void percpu_ticks(void);
+    percpu_ticks();
+
+    tsc_timer[cpu].expires = NOW() + MILLISECS(500);
+    add_ac_timer(&tsc_timer[cpu]);
+}
+
 /* Domain timer function, sends a virtual timer interrupt to domain */
 static void dom_timer_fn(unsigned long data)
 {
@@ -578,6 +593,11 @@ void __init scheduler_init(void)
         t_timer[i].cpu      = i;
         t_timer[i].data     = 3;
         t_timer[i].function = &t_timer_fn;
+
+        init_ac_timer(&tsc_timer[i]);
+        tsc_timer[i].cpu      = i;
+        tsc_timer[i].data     = 4;
+        tsc_timer[i].function = &tsc_timer_fn;
     }
 
     schedule_data[0].idle = &idle0_exec_domain;
@@ -609,6 +629,9 @@ void schedulers_start(void) 
 
     t_timer_fn(0);
     smp_call_function((void *)t_timer_fn, NULL, 1, 1);
+
+    tsc_timer_fn(0);
+    smp_call_function((void *)tsc_timer_fn, NULL, 1, 1);
 }
 
 
--- xeno-unstable.bk/xen/arch/x86/smpboot.c.orig	2005-05-23 15:33:50.000000000 -0700
+++ xeno-unstable.bk/xen/arch/x86/smpboot.c	2005-05-23 16:41:56.000000000 -0700
@@ -400,6 +400,7 @@ void __init start_secondary(void)
 
     extern void percpu_traps_init(void);
     extern void cpu_init(void);
+    extern void setup_percpu_time(void);
 
     set_current(idle_task[cpu]);
     set_processor_id(cpu);
@@ -421,6 +422,8 @@ void __init start_secondary(void)
     construct_percpu_idt(cpu);
 #endif
 
+    setup_percpu_time();
+
     local_flush_tlb();
 
     startup_cpu_idle_loop();

--- xeno-unstable.bk/xen/arch/x86/time.c.orig	2005-05-23 17:25:12.000000000 -0700
+++ xeno-unstable.bk/xen/arch/x86/time.c	2005-05-23 16:42:35.000000000 -0700
@@ -50,6 +50,29 @@ static u64             full_tsc_irq;    
 static s_time_t        stime_irq;       /* System time at last 'time update' */
 static unsigned long   wc_sec, wc_usec; /* UTC time at last 'time update'.   */
 static rwlock_t        time_lock = RW_LOCK_UNLOCKED;
+static time_info_t     percpu_time_info[NR_CPUS];
+
+void percpu_ticks(void)
+{
+    int cpu = smp_processor_id();
+    time_info_t *t = &percpu_time_info[cpu];
+    u64 tsc, delta;
+    u64 quarter = t->cpu_freq >> 2;
+
+    rdtscll(tsc);
+    delta = tsc - t->tsc_timestamp;
+    while (delta >= quarter) {
+	t->wc_usec += 1000000UL / 4;
+	t->system_time += 1000000000ULL / 4;
+	t->tsc_timestamp += quarter;
+	delta -= quarter;
+    }
+
+    while (t->wc_usec > 1000000UL) {
+	t->wc_sec += 1;
+	t->wc_usec -= 10000000UL;
+    }
+}
 
 void timer_interrupt(int irq, void *dev_id, struct cpu_user_regs *regs)
 {
@@ -278,20 +301,29 @@ static inline void __update_dom_time(str
 {
     struct domain *d  = ed->domain;
     shared_info_t *si = d->shared_info;
+    time_info_t *dom = &si->vcpu_time[ed->processor];
+    time_info_t *xen = &percpu_time_info[smp_processor_id()];
 
     spin_lock(&d->time_lock);
 
     si->time_version1++;
+    dom->time_version1++;
     wmb();
 
     si->cpu_freq       = cpu_freq;
+    dom->cpu_freq      = xen->cpu_freq;
     si->tsc_timestamp  = full_tsc_irq;
+    dom->tsc_timestamp = xen->tsc_timestamp;
     si->system_time    = stime_irq;
+    dom->system_time   = xen->system_time;
     si->wc_sec         = wc_sec;
+    dom->wc_sec        = xen->wc_sec;
     si->wc_usec        = wc_usec;
+    dom->wc_usec       = xen->wc_usec;
 
     wmb();
     si->time_version2++;
+    dom->time_version2++;
 
     spin_unlock(&d->time_lock);
 }
@@ -299,8 +331,11 @@ static inline void __update_dom_time(str
 void update_dom_time(struct exec_domain *ed)
 {
     unsigned long flags;
+    int cpu = smp_processor_id();
 
-    if ( ed->domain->shared_info->tsc_timestamp != full_tsc_irq )
+    if ( ed->domain->shared_info->tsc_timestamp != full_tsc_irq 
+    ||   ed->domain->shared_info->vcpu_time[ed->processor].tsc_timestamp !=
+	percpu_time_info[cpu].tsc_timestamp)
     {
         read_lock_irqsave(&time_lock, flags);
         __update_dom_time(ed);
@@ -313,6 +348,7 @@ void do_settime(unsigned long secs, unsi
 {
     s64 delta;
     long _usecs = (long)usecs;
+    int i;
 
     write_lock_irq(&time_lock);
 
@@ -327,6 +363,10 @@ void do_settime(unsigned long secs, unsi
 
     wc_sec  = secs;
     wc_usec = _usecs;
+    for (i=0; i<NR_CPUS; i++) {
+	percpu_time_info[i].wc_sec = wc_sec;
+	percpu_time_info[i].wc_usec = wc_usec;
+    }
 
     /* Others will pick up the change at the next tick. */
     __update_dom_time(current);
@@ -336,16 +376,39 @@ void do_settime(unsigned long secs, unsi
 }
 
 
+spinlock_t tsc_lock = SPIN_LOCK_UNLOCKED;
+
+/*
+ * Time setup for this processor.
+ */
+void __init setup_percpu_time(void)
+{
+    unsigned long flags;
+    unsigned long ticks_per_frac;
+    int cpu = smp_processor_id();
+
+    /* only have 1 cpu calibrate at a time */
+    spin_lock_irqsave(&tsc_lock, flags);
+    ticks_per_frac = calibrate_tsc();
+    spin_unlock_irqrestore(&tsc_lock, flags);
+
+    if (!ticks_per_frac)
+	panic("Error calibrating TSC\n");
+    percpu_time_info[cpu].cpu_freq = (u64)ticks_per_frac * (u64)CALIBRATE_FRAC;
+    rdtscll(percpu_time_info[cpu].tsc_timestamp);
+    percpu_time_info[cpu].system_time = stime_irq;
+}
+
 /* Late init function (after all CPUs are booted). */
 int __init init_xen_time()
 {
     u64      scale;
     unsigned int cpu_ghz;
+    int i;
 
     cpu_ghz = (unsigned int)(cpu_freq / 1000000000ULL);
     for ( rdtsc_bitshift = 0; cpu_ghz != 0; rdtsc_bitshift++, cpu_ghz >>= 1 )
         continue;
-
     scale  = 1000000000LL << (32 + rdtsc_bitshift);
     scale /= cpu_freq;
     st_scale_f = scale & 0xffffffff;
@@ -358,6 +421,12 @@ int __init init_xen_time()
 
     /* Wallclock time starts as the initial RTC time. */
     wc_sec  = get_cmos_time();
+    for (i=0; i<NR_CPUS; i++) {
+	percpu_time_info[i].wc_sec = wc_sec;
+	percpu_time_info[i].wc_usec = 0;
+	percpu_time_info[i].system_time = stime_irq;
+	percpu_time_info[i].cpu_freq = cpu_freq;	// default speed
+    }
 
     printk("Time init:\n");
     printk(".... cpu_freq:    %08X:%08X\n", (u32)(cpu_freq>>32),(u32)cpu_freq);

--- xeno-unstable.bk/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/time.c.orig	2005-05-23 17:28:47.000000000 -0700
+++ xeno-unstable.bk/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/time.c	2005-05-23 17:06:18.000000000 -0700
@@ -105,9 +105,13 @@ struct timer_opts *cur_timer = &timer_ts
 
 /* These are peridically updated in shared_info, and then copied here. */
 u32 shadow_tsc_stamp;
+DEFINE_PER_CPU(u32, shadow_tsc_stamp);
 u64 shadow_system_time;
+DEFINE_PER_CPU(u64, shadow_system_time);
 static u32 shadow_time_version;
+DEFINE_PER_CPU(u32, shadow_time_version);
 static struct timeval shadow_tv;
+static DEFINE_PER_CPU(struct timeval, shadow_tv);
 
 /*
  * We use this to ensure that gettimeofday() is monotonically increasing. We
@@ -171,23 +175,29 @@ __setup("independent_wallclock", __indep
 static void __get_time_values_from_xen(void)
 {
 	shared_info_t *s = HYPERVISOR_shared_info;
+	int cpu = smp_processor_id();
 
 	do {
 		shadow_time_version = s->time_version2;
+		per_cpu(shadow_time_version, cpu) = s->vcpu_time[cpu].time_version2;
 		rmb();
 		shadow_tv.tv_sec    = s->wc_sec;
 		shadow_tv.tv_usec   = s->wc_usec;
 		shadow_tsc_stamp    = (u32)s->tsc_timestamp;
 		shadow_system_time  = s->system_time;
+		per_cpu(shadow_tv.tv_sec, cpu)	= s->vcpu_time[cpu].wc_sec;
+		per_cpu(shadow_tv.tv_usec, cpu)	= s->vcpu_time[cpu].wc_usec;
+		per_cpu(shadow_tsc_stamp, cpu)	= (u32)s->vcpu_time[cpu].tsc_timestamp;
+		per_cpu(shadow_system_time, cpu) = s->vcpu_time[cpu].system_time;
 		rmb();
 	}
-	while (shadow_time_version != s->time_version1);
+	while (shadow_time_version != s->time_version1 ||  per_cpu(shadow_time_version, cpu) != s->vcpu_time[cpu].time_version1);
 
 	cur_timer->mark_offset();
 }
 
 #define TIME_VALUES_UP_TO_DATE \
- ({ rmb(); (shadow_time_version == HYPERVISOR_shared_info->time_version2); })
+ ({ rmb(); ((per_cpu(shadow_time_version, cpu) == HYPERVISOR_shared_info->vcpu_time[cpu].time_version2) && (shadow_time_version == HYPERVISOR_shared_info->time_version2)); })
 
 /*
  * This version of gettimeofday has microsecond resolution
@@ -200,6 +210,7 @@ void do_gettimeofday(struct timeval *tv)
 	unsigned long max_ntp_tick;
 	unsigned long flags;
 	s64 nsec;
+	int cpu = smp_processor_id();
 
 	do {
 		unsigned long lost;
@@ -227,7 +238,7 @@ void do_gettimeofday(struct timeval *tv)
 		sec = xtime.tv_sec;
 		usec += (xtime.tv_nsec / NSEC_PER_USEC);
 
-		nsec = shadow_system_time - processed_system_time;
+		nsec = per_cpu(shadow_system_time, cpu) - per_cpu(processed_system_time, cpu);
 		__normalize_time(&sec, &nsec);
 		usec += (long)nsec / NSEC_PER_USEC;
 
@@ -273,6 +284,7 @@ int do_settimeofday(struct timespec *tv)
 	long wtm_nsec;
 	s64 nsec;
 	struct timespec xentime;
+	int cpu = smp_processor_id();
 
 	if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
 		return -EINVAL;
@@ -306,7 +318,7 @@ int do_settimeofday(struct timespec *tv)
 	 */
 	nsec -= (jiffies - wall_jiffies) * TICK_NSEC;
 
-	nsec -= (shadow_system_time - processed_system_time);
+	nsec -= (per_cpu(shadow_system_time, cpu) - per_cpu(processed_system_time, cpu));
 
 	__normalize_time(&sec, &nsec);
 	wtm_sec  = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
@@ -392,27 +404,25 @@ static inline void do_timer_interrupt(in
 					struct pt_regs *regs)
 {
 	time_t wtm_sec, sec;
-	s64 delta, delta_cpu, nsec;
+	s64 delta_cpu, nsec;
 	long sec_diff, wtm_nsec;
 	int cpu = smp_processor_id();
 
 	do {
 		__get_time_values_from_xen();
 
-		delta = delta_cpu = (s64)shadow_system_time +
-			((s64)cur_timer->get_offset() * (s64)NSEC_PER_USEC);
-		delta     -= processed_system_time;
-		delta_cpu -= per_cpu(processed_system_time, cpu);
+		delta_cpu = (s64)per_cpu(shadow_system_time, cpu) +
+			((s64)cur_timer->get_offset() * (s64)NSEC_PER_USEC)
+			- per_cpu(processed_system_time, cpu);
 	}
 	while (!TIME_VALUES_UP_TO_DATE);
 
-	if (unlikely(delta < 0) || unlikely(delta_cpu < 0)) {
+	if (unlikely(delta_cpu < 0)) {
 		printk("Timer ISR/%d: Time went backwards: "
-		       "delta=%lld cpu_delta=%lld shadow=%lld "
-		       "off=%lld processed=%lld cpu_processed=%lld\n",
-		       cpu, delta, delta_cpu, shadow_system_time,
+		       "cpu_delta=%lld cpu_shadow=%lld "
+		       "off=%lld cpu_processed=%lld\n",
+		       cpu, delta_cpu, per_cpu(shadow_system_time, cpu),
 		       ((s64)cur_timer->get_offset() * (s64)NSEC_PER_USEC), 
-		       processed_system_time,
 		       per_cpu(processed_system_time, cpu));
 		for (cpu = 0; cpu < num_online_cpus(); cpu++)
 			printk(" %d: %lld\n", cpu,
@@ -420,19 +430,15 @@ static inline void do_timer_interrupt(in
 		return;
 	}
 
-	/* System-wide jiffy work. */
-	while (delta >= NS_PER_TICK) {
-		delta -= NS_PER_TICK;
-		processed_system_time += NS_PER_TICK;
-		do_timer(regs);
-	}
-
 	/* Local CPU jiffy work. */
 	while (delta_cpu >= NS_PER_TICK) {
 		delta_cpu -= NS_PER_TICK;
 		per_cpu(processed_system_time, cpu) += NS_PER_TICK;
 		update_process_times(user_mode(regs));
 		profile_tick(CPU_PROFILING, regs);
+		/* System-wide jiffy work. */
+		if (cpu == 0)
+			do_timer(regs);
 	}
 
 	if (cpu != 0)
@@ -447,19 +453,19 @@ static inline void do_timer_interrupt(in
 	    ((time_status & STA_UNSYNC) != 0) &&
 	    (xtime.tv_sec > (last_update_from_xen + 60))) {
 		/* Adjust shadow for jiffies that haven't updated xtime yet. */
-		shadow_tv.tv_usec -= 
+		per_cpu(shadow_tv.tv_usec, cpu) -= 
 			(jiffies - wall_jiffies) * (USEC_PER_SEC / HZ);
-		HANDLE_USEC_UNDERFLOW(shadow_tv);
+		HANDLE_USEC_UNDERFLOW(per_cpu(shadow_tv, cpu));
 
 		/*
 		 * Reset our running time counts if they are invalidated by
 		 * a warp backwards of more than 500ms.
 		 */
-		sec_diff = xtime.tv_sec - shadow_tv.tv_sec;
+		sec_diff = xtime.tv_sec - per_cpu(shadow_tv.tv_sec, cpu);
 		if (unlikely(abs(sec_diff) > 1) ||
 		    unlikely(((sec_diff * USEC_PER_SEC) +
 			      (xtime.tv_nsec / NSEC_PER_USEC) -
-			      shadow_tv.tv_usec) > 500000)) {
+			      per_cpu(shadow_tv.tv_usec, cpu)) > 500000)) {
 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
 			last_rtc_update = last_update_to_xen = 0;
 #endif
@@ -467,8 +473,8 @@ static inline void do_timer_interrupt(in
 		}
 
 		/* Update our unsynchronised xtime appropriately. */
-		sec = shadow_tv.tv_sec;
-		nsec = shadow_tv.tv_usec * NSEC_PER_USEC;
+		sec = per_cpu(shadow_tv.tv_sec, cpu);
+		nsec = per_cpu(shadow_tv.tv_usec, cpu) * NSEC_PER_USEC;
 
 		__normalize_time(&sec, &nsec);
 		wtm_sec  = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
@@ -498,7 +504,7 @@ static inline void do_timer_interrupt(in
 		op.cmd = DOM0_SETTIME;
 		op.u.settime.secs        = tv.tv_sec;
 		op.u.settime.usecs       = tv.tv_usec;
-		op.u.settime.system_time = shadow_system_time;
+		op.u.settime.system_time = per_cpu(shadow_system_time, cpu);
 		HYPERVISOR_dom0_op(&op);
 
 		last_update_to_xen = xtime.tv_sec;
@@ -670,7 +676,7 @@ void __init time_init(void)
 	set_normalized_timespec(&wall_to_monotonic,
 		-xtime.tv_sec, -xtime.tv_nsec);
 	processed_system_time = shadow_system_time;
-	per_cpu(processed_system_time, 0) = processed_system_time;
+	per_cpu(processed_system_time, 0) = per_cpu(shadow_system_time, 0);
 
 	if (timer_tsc_init.init(NULL) != 0)
 		BUG();
@@ -759,7 +765,7 @@ void time_resume(void)
 
 	/* Reset our own concept of passage of system time. */
 	processed_system_time = shadow_system_time;
-	per_cpu(processed_system_time, 0) = processed_system_time;
+	per_cpu(processed_system_time, 0) = per_cpu(shadow_system_time, 0);
 
 	/* Accept a warp in UTC (wall-clock) time. */
 	last_seen_tv.tv_sec = 0;
@@ -776,7 +782,7 @@ void local_setup_timer(void)
 
 	do {
 		seq = read_seqbegin(&xtime_lock);
-		per_cpu(processed_system_time, cpu) = shadow_system_time;
+		per_cpu(processed_system_time, cpu) = per_cpu(shadow_system_time, cpu);
 	} while (read_seqretry(&xtime_lock, seq));
 
 	per_cpu(timer_irq, cpu) = bind_virq_to_irq(VIRQ_TIMER);

--- xeno-unstable.bk/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/timers/timer_tsc.c.orig	2005-05-23 17:29:10.000000000 -0700
+++ xeno-unstable.bk/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/timers/timer_tsc.c	2005-05-23 17:04:11.000000000 -0700
@@ -10,6 +10,7 @@
 #include <linux/cpufreq.h>
 #include <linux/string.h>
 #include <linux/jiffies.h>
+#include <linux/percpu.h>
 
 #include <asm/timer.h>
 #include <asm/io.h>
@@ -35,8 +36,8 @@ extern spinlock_t i8253_lock;
 
 static int use_tsc;
 
-static unsigned long long monotonic_base;
-static u32 monotonic_offset;
+static DEFINE_PER_CPU(unsigned long long, monotonic_base);
+static DEFINE_PER_CPU(u32, monotonic_offset);
 static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
 
 /* convert from cycles(64bits) => nanoseconds (64bits)
@@ -74,8 +75,8 @@ static inline unsigned long long cycles_
  */
 static unsigned long fast_gettimeoffset_quotient;
 
-extern u32 shadow_tsc_stamp;
-extern u64 shadow_system_time;
+extern DEFINE_PER_CPU(u32, shadow_tsc_stamp);
+extern DEFINE_PER_CPU(u64, shadow_system_time);
 
 static unsigned long get_offset_tsc(void)
 {
@@ -86,7 +87,7 @@ static unsigned long get_offset_tsc(void
 	rdtsc(eax,edx);
 
 	/* .. relative to previous jiffy (32 bits is enough) */
-	eax -= shadow_tsc_stamp;
+	eax -= per_cpu(shadow_tsc_stamp, smp_processor_id());
 
 	/*
          * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
@@ -110,12 +111,13 @@ static unsigned long long monotonic_cloc
 {
 	unsigned long long last_offset, this_offset, base;
 	unsigned seq;
+	int cpu = smp_processor_id();
 	
 	/* atomically read monotonic base & last_offset */
 	do {
 		seq = read_seqbegin(&monotonic_lock);
-		last_offset = monotonic_offset;
-		base = monotonic_base;
+		last_offset = per_cpu(monotonic_offset, cpu);
+		base = per_cpu(monotonic_base, cpu);
 	} while (read_seqretry(&monotonic_lock, seq));
 
 	/* Read the Time Stamp Counter */
@@ -152,11 +154,12 @@ unsigned long long sched_clock(void)
 
 static void mark_offset_tsc(void)
 {
+	int cpu = smp_processor_id();
 
 	/* update the monotonic base value */
 	write_seqlock(&monotonic_lock);
-	monotonic_base = shadow_system_time;
-	monotonic_offset = shadow_tsc_stamp;
+	per_cpu(monotonic_base, cpu) = per_cpu(shadow_system_time, cpu);
+	per_cpu(monotonic_offset, cpu) = per_cpu(shadow_tsc_stamp, cpu);
 	write_sequnlock(&monotonic_lock);
 }
 
-- 
Don Fry
brazilnut@us.ibm.com

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2005-06-07 21:16 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-05-24  1:20 [PATCH] per-cpu timer changes Ian Pratt
2005-05-26 17:34 ` Don Fry
2005-05-26 18:01   ` Keir Fraser
2005-05-26 18:06     ` Andrew Thompson
2005-05-26 18:03   ` Nivedita Singhvi
2005-05-26 19:25     ` Don Fry
  -- strict thread matches above, loose matches on Subject: below --
2005-06-07 21:16 Don Fry
2005-05-24  0:35 Don Fry

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.