All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] clocksource=tsc
@ 2008-07-12 21:38 Dan Magenheimer
  2008-07-13  3:59 ` Dan Magenheimer
  2008-07-14  9:24 ` Keir Fraser
  0 siblings, 2 replies; 28+ messages in thread
From: Dan Magenheimer @ 2008-07-12 21:38 UTC (permalink / raw)
  To: Xen-Devel (E-mail), Keir Fraser, dan.magenheimer@oracle.com; +Cc: Dave Winchell

[-- Attachment #1: Type: text/plain, Size: 817 bytes --]

Attached patch adds clocksource=tsc boot option that
uses TSC as clocksource.  This option should only be
used on machines where TSC is known to be synchronized
across all processors.  A future TODO is to dynamically
determine if this is the case.

TSC may "beat" with another clocksource, resulting in
cross-processor Xen system time skew.  This skew can
be visible in PV guests and can appear as "time is stopped"
in hvm guests.  On some systems, this patch can reduce skew
by 30x or more.

Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com>

===================================
Thanks... for the memory
I really could use more / My throughput's on the floor
The balloon is flat / My swap disk's fat / I've OOM's in store
Overcommitted so much
(with apologies to the late great Bob Hope)

[-- Attachment #2: tscstable.patch --]
[-- Type: application/octet-stream, Size: 4654 bytes --]

diff -r bd97e45e073a xen/arch/x86/time.c
--- a/xen/arch/x86/time.c	Tue Jul 08 09:28:50 2008 +0100
+++ b/xen/arch/x86/time.c	Sat Jul 12 14:58:44 2008 -0600
@@ -452,6 +452,24 @@ static int init_pmtimer(struct platform_
 }
 
 /************************************************************
+ * PLATFORM TIMER 4: TSC
+ */
+
+static bool_t clocksource_is_tsc = 0;
+static u64 tsc_freq;
+
+static int init_tsctimer(struct platform_timesource *pts)
+{
+    /* TODO: evaluate stability of TSC here, return 0 if not stable */
+    pts->name = "TSC";
+    pts->frequency = tsc_freq;
+    pts->read_counter = 0;  /* unused as of now */
+    pts->counter_bits = 64;
+    clocksource_is_tsc = 1;
+    return 1;
+}
+
+/************************************************************
  * GENERIC PLATFORM TIMER INFRASTRUCTURE
  */
 
@@ -483,16 +501,28 @@ static void plt_overflow(void *unused)
 
 static s_time_t __read_platform_stime(u64 platform_time)
 {
-    u64 diff = platform_time - platform_timer_stamp;
+    u64 diff, tsc;
+
+    if ( clocksource_is_tsc )
+    {
+        rdtscll(tsc);
+        return scale_delta(tsc, &plt_scale);
+    }
+    diff = platform_time - platform_timer_stamp;
     ASSERT(spin_is_locked(&platform_timer_lock));
     return (stime_platform_stamp + scale_delta(diff, &plt_scale));
 }
 
 static s_time_t read_platform_stime(void)
 {
-    u64 count;
+    u64 count, tsc;
     s_time_t stime;
 
+    if ( clocksource_is_tsc )
+    {
+        rdtscll(tsc);
+        return scale_delta(tsc, &plt_scale);
+    }
     spin_lock(&platform_timer_lock);
     count = plt_stamp64 + ((plt_src.read_counter() - plt_stamp) & plt_mask);
     stime = __read_platform_stime(count);
@@ -506,6 +536,8 @@ static void platform_time_calibration(vo
     u64 count;
     s_time_t stamp;
 
+    if ( clocksource_is_tsc )
+        return;
     spin_lock(&platform_timer_lock);
     count = plt_stamp64 + ((plt_src.read_counter() - plt_stamp) & plt_mask);
     stamp = __read_platform_stime(count);
@@ -516,6 +548,8 @@ static void platform_time_calibration(vo
 
 static void resume_platform_timer(void)
 {
+    if ( clocksource_is_tsc )
+        return;
     /* No change in platform_stime across suspend/resume. */
     platform_timer_stamp = plt_stamp64;
     plt_stamp = plt_src.read_counter();
@@ -536,6 +570,8 @@ static void init_platform_timer(void)
             rc = init_cyclone(pts);
         else if ( !strcmp(opt_clocksource, "acpi") )
             rc = init_pmtimer(pts);
+        else if ( !strcmp(opt_clocksource, "tsc") )
+            rc = init_tsctimer(pts);
 
         if ( rc <= 0 )
             printk("WARNING: %s clocksource '%s'.\n",
@@ -549,16 +585,20 @@ static void init_platform_timer(void)
          !init_pmtimer(pts) )
         init_pit(pts);
 
-    plt_mask = (u32)~0u >> (32 - pts->counter_bits);
-
     set_time_scale(&plt_scale, pts->frequency);
 
-    plt_overflow_period = scale_delta(
-        1ull << (pts->counter_bits-1), &plt_scale);
-    init_timer(&plt_overflow_timer, plt_overflow, NULL, 0);
-    plt_overflow(NULL);
+    if (pts->counter_bits != 64 )
+    {
+        plt_mask = (u32)~0u >> (32 - pts->counter_bits);
 
-    platform_timer_stamp = plt_stamp64;
+        plt_overflow_period = scale_delta(
+            1ull << (pts->counter_bits-1), &plt_scale);
+        init_timer(&plt_overflow_timer, plt_overflow, NULL, 0);
+        plt_overflow(NULL);
+
+        platform_timer_stamp = plt_stamp64;
+    }
+
 
     printk("Platform timer is %s %s\n",
            freq_string(pts->frequency), pts->name);
@@ -580,7 +620,8 @@ void cstate_restore_tsc(void)
     u32    plt_count_delta;
     u64    tsc_delta;
 
-    if (!tsc_invariant){
+    if ( !tsc_invariant && !clocksource_is_tsc )
+    {
         t = &this_cpu(cpu_time);
 
         /* if platform counter overflow happens, interrupt will bring CPU from
@@ -687,14 +728,18 @@ static unsigned long get_cmos_time(void)
 
 s_time_t get_s_time(void)
 {
-    struct cpu_time *t = &this_cpu(cpu_time);
+    struct cpu_time *t;
     u64 tsc, delta;
     s_time_t now;
 
     rdtscll(tsc);
-    delta = tsc - t->local_tsc_stamp;
-    now = t->stime_local_stamp + scale_delta(delta, &t->tsc_scale);
-
+    if ( clocksource_is_tsc )
+        now = scale_delta(tsc, &plt_scale);
+    else {
+        t = &this_cpu(cpu_time);
+        delta = tsc - t->local_tsc_stamp;
+        now = t->stime_local_stamp + scale_delta(delta, &t->tsc_scale);
+    }
     return now;
 }
 
@@ -996,6 +1041,7 @@ void __init early_time_init(void)
 {
     u64 tmp = init_pit_and_calibrate_tsc();
 
+    tsc_freq = tmp;
     set_time_scale(&this_cpu(cpu_time).tsc_scale, tmp);
 
     do_div(tmp, 1000);

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 28+ messages in thread

* RE: [PATCH] clocksource=tsc
  2008-07-12 21:38 [PATCH] clocksource=tsc Dan Magenheimer
@ 2008-07-13  3:59 ` Dan Magenheimer
  2008-07-14  9:24 ` Keir Fraser
  1 sibling, 0 replies; 28+ messages in thread
From: Dan Magenheimer @ 2008-07-13  3:59 UTC (permalink / raw)
  To: dan.magenheimer@oracle.com, Xen-Devel (E-mail), Keir Fraser; +Cc: Dave Winchell

[-- Attachment #1: Type: text/plain, Size: 2010 bytes --]

I meant to add... although this appears to be new functionality
and thus too late for 3.3, I'd like to argue that it is really
a bug fix:  Let's say there are good-tsc machines and bad-tsc
machines.  Prior to cset 17716 ("hvm: Build guest timers on
monotonic system time") all guest platform timer reads were
built directly on top of physical TSC.  This was a big problem
for bad-tsc SMP machines, but fine for good-tsc machines.  Cset 17716
made things better for bad-tsc machines, but actually made them
worse for good-tsc SMP machines (because of the inter-processor
skew).

So users with good-tsc machines moving from a pre-3.3 release
to 3.3 may see time "stopped" in 3.3 when there was no problem
with pre-3.3.    The clocksource=tsc patch partially
fixes this "regression" for good-tsc machines... at least it
provides a boot option to fix it.

Thanks,
Dan

> -----Original Message-----
> From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com]
> Sent: Saturday, July 12, 2008 3:38 PM
> To: Xen-Devel (E-mail); Keir Fraser; dan.magenheimer@oracle.com
> Cc: Dave Winchell
> Subject: [PATCH] clocksource=tsc
> 
> 
> Attached patch adds clocksource=tsc boot option that
> uses TSC as clocksource.  This option should only be
> used on machines where TSC is known to be synchronized
> across all processors.  A future TODO is to dynamically
> determine if this is the case.
> 
> TSC may "beat" with another clocksource, resulting in
> cross-processor Xen system time skew.  This skew can
> be visible in PV guests and can appear as "time is stopped"
> in hvm guests.  On some systems, this patch can reduce skew
> by 30x or more.
> 
> Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com>
> 
> ===================================
> Thanks... for the memory
> I really could use more / My throughput's on the floor
> The balloon is flat / My swap disk's fat / I've OOM's in store
> Overcommitted so much
> (with apologies to the late great Bob Hope)
> 

[-- Attachment #2: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] clocksource=tsc
  2008-07-12 21:38 [PATCH] clocksource=tsc Dan Magenheimer
  2008-07-13  3:59 ` Dan Magenheimer
@ 2008-07-14  9:24 ` Keir Fraser
  2008-07-14 17:59   ` Dan Magenheimer
  1 sibling, 1 reply; 28+ messages in thread
From: Keir Fraser @ 2008-07-14  9:24 UTC (permalink / raw)
  To: dan.magenheimer@oracle.com, Xen-Devel (E-mail); +Cc: Dave Winchell

I'm sympathetic to the patch, but it looks like an ugly hack right now. Most
of the clocksource_is_tsc special cases will fall away if you, e.g., have
platform_timer_stamp = stime_platform_stamp = local_tsc_stamp =
stime_local_stamp = 0; and provide a plt->read_counter() function; and
accept that for now you will unnecessarily take the platform-timer spinlock;
etc. Also it is platform timer 5, not 4!

Clean it up and it'll go in. Since it should clearly only affect behaviour
when the new clocksource is explicitly selected, I'll take this patch
further into 3.3 freeze than usual -- it's more important to me that it's
clean.

 Thanks,
 Keir

On 12/7/08 22:38, "Dan Magenheimer" <dan.magenheimer@oracle.com> wrote:

> Attached patch adds clocksource=tsc boot option that
> uses TSC as clocksource.  This option should only be
> used on machines where TSC is known to be synchronized
> across all processors.  A future TODO is to dynamically
> determine if this is the case.
> 
> TSC may "beat" with another clocksource, resulting in
> cross-processor Xen system time skew.  This skew can
> be visible in PV guests and can appear as "time is stopped"
> in hvm guests.  On some systems, this patch can reduce skew
> by 30x or more.
> 
> Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com>
> 
> ===================================
> Thanks... for the memory
> I really could use more / My throughput's on the floor
> The balloon is flat / My swap disk's fat / I've OOM's in store
> Overcommitted so much
> (with apologies to the late great Bob Hope)

^ permalink raw reply	[flat|nested] 28+ messages in thread

* RE: [PATCH] clocksource=tsc
  2008-07-14  9:24 ` Keir Fraser
@ 2008-07-14 17:59   ` Dan Magenheimer
  2008-07-15  0:35     ` Tian, Kevin
  2008-07-15 13:05     ` Keir Fraser
  0 siblings, 2 replies; 28+ messages in thread
From: Dan Magenheimer @ 2008-07-14 17:59 UTC (permalink / raw)
  To: Keir Fraser, Xen-Devel (E-mail); +Cc: Dave Winchell

[-- Attachment #1: Type: text/plain, Size: 2503 bytes --]

> I'm sympathetic to the patch, but it looks like an ugly hack 
> right now.

> Since it should clearly only affect behaviour when the new
> clocksource is explicitly selected...

Actually the ugliness is because the read_counter function
and plt_mask are both u32 and I worried that changing them to
a u64 would be too invasive.

So here's two possible replacement patches, one with those
switched to u64 and the other with separate 32- and 64-bit
read_counter functions.  Both are boot-tested with and
without clocksource=tsc.

Dan

> -----Original Message-----
> From: Keir Fraser [mailto:keir.fraser@eu.citrix.com]
> Sent: Monday, July 14, 2008 3:24 AM
> To: dan.magenheimer@oracle.com; Xen-Devel (E-mail)
> Cc: Dave Winchell
> Subject: Re: [PATCH] clocksource=tsc
> 
> 
> I'm sympathetic to the patch, but it looks like an ugly hack 
> right now. Most
> of the clocksource_is_tsc special cases will fall away if 
> you, e.g., have
> platform_timer_stamp = stime_platform_stamp = local_tsc_stamp =
> stime_local_stamp = 0; and provide a plt->read_counter() function; and
> accept that for now you will unnecessarily take the 
> platform-timer spinlock;
> etc. Also it is platform timer 5, not 4!
> 
> Clean it up and it'll go in. Since it should clearly only 
> affect behaviour
> when the new clocksource is explicitly selected, I'll take this patch
> further into 3.3 freeze than usual -- it's more important to 
> me that it's
> clean.
> 
>  Thanks,
>  Keir
> 
> On 12/7/08 22:38, "Dan Magenheimer" 
> <dan.magenheimer@oracle.com> wrote:
> 
> > Attached patch adds clocksource=tsc boot option that
> > uses TSC as clocksource.  This option should only be
> > used on machines where TSC is known to be synchronized
> > across all processors.  A future TODO is to dynamically
> > determine if this is the case.
> >
> > TSC may "beat" with another clocksource, resulting in
> > cross-processor Xen system time skew.  This skew can
> > be visible in PV guests and can appear as "time is stopped"
> > in hvm guests.  On some systems, this patch can reduce skew
> > by 30x or more.
> >
> > Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com>
> >
> > ===================================
> > Thanks... for the memory
> > I really could use more / My throughput's on the floor
> > The balloon is flat / My swap disk's fat / I've OOM's in store
> > Overcommitted so much
> > (with apologies to the late great Bob Hope)
> 
> 
>

[-- Attachment #2: tscstable2.patch --]
[-- Type: application/octet-stream, Size: 6953 bytes --]

diff -r bd97e45e073a xen/arch/x86/time.c
--- a/xen/arch/x86/time.c	Tue Jul 08 09:28:50 2008 +0100
+++ b/xen/arch/x86/time.c	Mon Jul 14 11:10:09 2008 -0600
@@ -54,14 +54,14 @@ struct cpu_time {
     s_time_t stime_local_stamp;
     s_time_t stime_master_stamp;
     struct time_scale tsc_scale;
-    u32 cstate_plt_count_stamp;
+    u64 cstate_plt_count_stamp;
     struct timer calibration_timer;
 };
 
 struct platform_timesource {
     char *name;
     u64 frequency;
-    u32 (*read_counter)(void);
+    u64 (*read_counter)(void);
     int counter_bits;
 };
 
@@ -311,7 +311,7 @@ static char *freq_string(u64 freq)
  * PLATFORM TIMER 1: PROGRAMMABLE INTERVAL TIMER (LEGACY PIT)
  */
 
-static u32 read_pit_count(void)
+static u64 read_pit_count(void)
 {
     u16 count16;
     u32 count32;
@@ -327,7 +327,7 @@ static u32 read_pit_count(void)
 
     spin_unlock_irqrestore(&pit_lock, flags);
 
-    return count32;
+    return (u64)count32;
 }
 
 static void init_pit(struct platform_timesource *pts)
@@ -343,9 +343,9 @@ static void init_pit(struct platform_tim
  * PLATFORM TIMER 2: HIGH PRECISION EVENT TIMER (HPET)
  */
 
-static u32 read_hpet_count(void)
+static u64 read_hpet_count(void)
 {
-    return hpet_read32(HPET_COUNTER);
+    return (u64)hpet_read32(HPET_COUNTER);
 }
 
 static int init_hpet(struct platform_timesource *pts)
@@ -383,9 +383,9 @@ int use_cyclone;
 /* Cyclone MPMC0 register. */
 static volatile u32 *cyclone_timer;
 
-static u32 read_cyclone_count(void)
+static u64 read_cyclone_count(void)
 {
-    return *cyclone_timer;
+    return (u64)*cyclone_timer;
 }
 
 static volatile u32 *map_cyclone_reg(unsigned long regaddr)
@@ -433,9 +433,9 @@ u32 pmtmr_ioport;
 /* ACPI PM timer ticks at 3.579545 MHz. */
 #define ACPI_PM_FREQUENCY 3579545
 
-static u32 read_pmtimer_count(void)
+static u64 read_pmtimer_count(void)
 {
-    return inl(pmtmr_ioport);
+    return (u64)inl(pmtmr_ioport);
 }
 
 static int init_pmtimer(struct platform_timesource *pts)
@@ -452,15 +452,38 @@ static int init_pmtimer(struct platform_
 }
 
 /************************************************************
+ * PLATFORM TIMER 5: TSC
+ */
+
+static u64 tsc_freq;
+
+static u64 read_tsc_count(void)
+{
+        u64 tsc;
+	rdtscll(tsc);
+        return tsc;
+}
+
+static int init_tsctimer(struct platform_timesource *pts)
+{
+    /* TODO: evaluate stability of TSC here, return 0 if not stable */
+    pts->name = "TSC";
+    pts->frequency = tsc_freq;
+    pts->read_counter = read_tsc_count;
+    pts->counter_bits = 64;
+    return 1;
+}
+
+/************************************************************
  * GENERIC PLATFORM TIMER INFRASTRUCTURE
  */
 
 static struct platform_timesource plt_src; /* details of chosen timesource  */
-static u32 plt_mask;             /* hardware-width mask                     */
+static u64 plt_mask;             /* hardware-width mask                     */
 static u64 plt_overflow_period;  /* ns between calls to plt_overflow()      */
 static struct time_scale plt_scale; /* scale: platform counter -> nanosecs  */
 
-/* Protected by platform_timer_lock. */
+/* Protected by platform_timer_lock.  Must be zero for 64-bit clocksources */
 static DEFINE_SPINLOCK(platform_timer_lock);
 static s_time_t stime_platform_stamp; /* System time at below platform time */
 static u64 platform_timer_stamp;      /* Platform time at above system time */
@@ -472,8 +495,9 @@ static void plt_overflow(void *unused)
 {
     u32 count;
 
+    ASSERT(plt_src.counter_bits <= 32);
     spin_lock(&platform_timer_lock);
-    count = plt_src.read_counter();
+    count = (u32)plt_src.read_counter();
     plt_stamp64 += (count - plt_stamp) & plt_mask;
     plt_stamp = count;
     spin_unlock(&platform_timer_lock);
@@ -506,6 +530,8 @@ static void platform_time_calibration(vo
     u64 count;
     s_time_t stamp;
 
+    if ( plt_src.counter_bits == 64 )
+        return;
     spin_lock(&platform_timer_lock);
     count = plt_stamp64 + ((plt_src.read_counter() - plt_stamp) & plt_mask);
     stamp = __read_platform_stime(count);
@@ -516,6 +542,8 @@ static void platform_time_calibration(vo
 
 static void resume_platform_timer(void)
 {
+    if ( plt_src.counter_bits == 64 )
+        return;
     /* No change in platform_stime across suspend/resume. */
     platform_timer_stamp = plt_stamp64;
     plt_stamp = plt_src.read_counter();
@@ -536,6 +564,8 @@ static void init_platform_timer(void)
             rc = init_cyclone(pts);
         else if ( !strcmp(opt_clocksource, "acpi") )
             rc = init_pmtimer(pts);
+        else if ( !strcmp(opt_clocksource, "tsc") )
+            rc = init_tsctimer(pts);
 
         if ( rc <= 0 )
             printk("WARNING: %s clocksource '%s'.\n",
@@ -549,16 +579,26 @@ static void init_platform_timer(void)
          !init_pmtimer(pts) )
         init_pit(pts);
 
-    plt_mask = (u32)~0u >> (32 - pts->counter_bits);
-
     set_time_scale(&plt_scale, pts->frequency);
 
-    plt_overflow_period = scale_delta(
-        1ull << (pts->counter_bits-1), &plt_scale);
-    init_timer(&plt_overflow_timer, plt_overflow, NULL, 0);
-    plt_overflow(NULL);
+    if (pts->counter_bits != 64 )
+    {
+        plt_mask = (u32)~0u >> (32 - pts->counter_bits);
 
-    platform_timer_stamp = plt_stamp64;
+        plt_overflow_period = scale_delta(
+            1ull << (pts->counter_bits-1), &plt_scale);
+        init_timer(&plt_overflow_timer, plt_overflow, NULL, 0);
+        plt_overflow(NULL);
+
+        platform_timer_stamp = plt_stamp64;
+    }
+    else
+    {
+        plt_mask = -1LL;
+        platform_timer_stamp =  stime_platform_stamp = 0;
+        plt_stamp = plt_stamp64 = 0;
+    }
+
 
     printk("Platform timer is %s %s\n",
            freq_string(pts->frequency), pts->name);
@@ -577,10 +617,11 @@ void cstate_restore_tsc(void)
 void cstate_restore_tsc(void)
 {
     struct cpu_time *t;
-    u32    plt_count_delta;
+    u64    plt_count_delta;
     u64    tsc_delta;
 
-    if (!tsc_invariant){
+    if ( !tsc_invariant )
+    {
         t = &this_cpu(cpu_time);
 
         /* if platform counter overflow happens, interrupt will bring CPU from
@@ -687,14 +728,19 @@ static unsigned long get_cmos_time(void)
 
 s_time_t get_s_time(void)
 {
-    struct cpu_time *t = &this_cpu(cpu_time);
+    struct cpu_time *t;
     u64 tsc, delta;
     s_time_t now;
 
     rdtscll(tsc);
-    delta = tsc - t->local_tsc_stamp;
-    now = t->stime_local_stamp + scale_delta(delta, &t->tsc_scale);
-
+    if ( plt_src.counter_bits == 64 )
+        now = scale_delta(tsc, &plt_scale);
+    else
+    {
+        t = &this_cpu(cpu_time);
+        delta = tsc - t->local_tsc_stamp;
+        now = t->stime_local_stamp + scale_delta(delta, &t->tsc_scale);
+    }
     return now;
 }
 
@@ -996,6 +1042,7 @@ void __init early_time_init(void)
 {
     u64 tmp = init_pit_and_calibrate_tsc();
 
+    tsc_freq = tmp;
     set_time_scale(&this_cpu(cpu_time).tsc_scale, tmp);
 
     do_div(tmp, 1000);

[-- Attachment #3: tscstable3.patch --]
[-- Type: application/octet-stream, Size: 8687 bytes --]

diff -r bd97e45e073a xen/arch/x86/time.c
--- a/xen/arch/x86/time.c	Tue Jul 08 09:28:50 2008 +0100
+++ b/xen/arch/x86/time.c	Mon Jul 14 11:50:54 2008 -0600
@@ -54,15 +54,17 @@ struct cpu_time {
     s_time_t stime_local_stamp;
     s_time_t stime_master_stamp;
     struct time_scale tsc_scale;
-    u32 cstate_plt_count_stamp;
+    u64 cstate_plt_count_stamp;
     struct timer calibration_timer;
 };
 
 struct platform_timesource {
     char *name;
     u64 frequency;
-    u32 (*read_counter)(void);
+    u32 (*read_counter32)(void);
+    u64 (*read_counter64)(void);
     int counter_bits;
+    u64 mask;
 };
 
 static DEFINE_PER_CPU(struct cpu_time, cpu_time);
@@ -334,8 +336,9 @@ static void init_pit(struct platform_tim
 {
     pts->name = "PIT";
     pts->frequency = CLOCK_TICK_RATE;
-    pts->read_counter = read_pit_count;
+    pts->read_counter32 = read_pit_count;
     pts->counter_bits = 32;
+    pts->mask = (u32)0xffffffff;
     using_pit = 1;
 }
 
@@ -357,8 +360,9 @@ static int init_hpet(struct platform_tim
 
     pts->name = "HPET";
     pts->frequency = hpet_rate;
-    pts->read_counter = read_hpet_count;
+    pts->read_counter32 = read_hpet_count;
     pts->counter_bits = 32;
+    pts->mask = (u32)0xffffffff;
 
     return 1;
 }
@@ -418,8 +422,9 @@ static int init_cyclone(struct platform_
 
     pts->name = "IBM Cyclone";
     pts->frequency = CYCLONE_TIMER_FREQ;
-    pts->read_counter = read_cyclone_count;
+    pts->read_counter32 = read_cyclone_count;
     pts->counter_bits = 32;
+    pts->mask = (u32)0xffffffff;
 
     return 1;
 }
@@ -445,9 +450,34 @@ static int init_pmtimer(struct platform_
 
     pts->name = "ACPI PM Timer";
     pts->frequency = ACPI_PM_FREQUENCY;
-    pts->read_counter = read_pmtimer_count;
+    pts->read_counter32 = read_pmtimer_count;
     pts->counter_bits = 24;
+    pts->mask = (u32)0xffffff;
 
+    return 1;
+}
+
+/************************************************************
+ * PLATFORM TIMER 5: TSC
+ */
+
+static u64 tsc_freq;
+
+static u64 read_tsc_count(void)
+{
+        u64 tsc;
+	rdtscll(tsc);
+        return tsc;
+}
+
+static int init_tsctimer(struct platform_timesource *pts)
+{
+    /* TODO: evaluate stability of TSC here, return 0 if not stable */
+    pts->name = "TSC";
+    pts->frequency = tsc_freq;
+    pts->read_counter64 = read_tsc_count;
+    pts->counter_bits = 64;
+    pts->mask = -1LL;
     return 1;
 }
 
@@ -456,11 +486,10 @@ static int init_pmtimer(struct platform_
  */
 
 static struct platform_timesource plt_src; /* details of chosen timesource  */
-static u32 plt_mask;             /* hardware-width mask                     */
 static u64 plt_overflow_period;  /* ns between calls to plt_overflow()      */
 static struct time_scale plt_scale; /* scale: platform counter -> nanosecs  */
 
-/* Protected by platform_timer_lock. */
+/* Protected by platform_timer_lock.  Must be zero for 64-bit clocksources */
 static DEFINE_SPINLOCK(platform_timer_lock);
 static s_time_t stime_platform_stamp; /* System time at below platform time */
 static u64 platform_timer_stamp;      /* Platform time at above system time */
@@ -468,13 +497,22 @@ static u32 plt_stamp;            /* hard
 static u32 plt_stamp;            /* hardware-width platform counter stamp   */
 static struct timer plt_overflow_timer;
 
+static inline u64 plt_read_counter(void)
+{
+    if ( plt_src.counter_bits == 64 )
+        return plt_src.read_counter64();
+    else
+        return plt_src.read_counter32();
+}
+
 static void plt_overflow(void *unused)
 {
     u32 count;
 
+    ASSERT(plt_src.counter_bits <= 32);
     spin_lock(&platform_timer_lock);
-    count = plt_src.read_counter();
-    plt_stamp64 += (count - plt_stamp) & plt_mask;
+    count = (u32)plt_src.read_counter32();
+    plt_stamp64 += (count - plt_stamp) & plt_src.mask;
     plt_stamp = count;
     spin_unlock(&platform_timer_lock);
 
@@ -494,7 +532,7 @@ static s_time_t read_platform_stime(void
     s_time_t stime;
 
     spin_lock(&platform_timer_lock);
-    count = plt_stamp64 + ((plt_src.read_counter() - plt_stamp) & plt_mask);
+    count = plt_stamp64 + ((plt_read_counter() - plt_stamp) & plt_src.mask);
     stime = __read_platform_stime(count);
     spin_unlock(&platform_timer_lock);
 
@@ -506,8 +544,10 @@ static void platform_time_calibration(vo
     u64 count;
     s_time_t stamp;
 
+    if ( plt_src.counter_bits == 64 )
+        return;
     spin_lock(&platform_timer_lock);
-    count = plt_stamp64 + ((plt_src.read_counter() - plt_stamp) & plt_mask);
+    count = plt_stamp64 + ((plt_read_counter() - plt_stamp) & plt_src.mask);
     stamp = __read_platform_stime(count);
     stime_platform_stamp = stamp;
     platform_timer_stamp = count;
@@ -516,9 +556,11 @@ static void platform_time_calibration(vo
 
 static void resume_platform_timer(void)
 {
+    if ( plt_src.counter_bits == 64 )
+        return;
     /* No change in platform_stime across suspend/resume. */
     platform_timer_stamp = plt_stamp64;
-    plt_stamp = plt_src.read_counter();
+    plt_stamp = plt_read_counter();
 }
 
 static void init_platform_timer(void)
@@ -536,6 +578,8 @@ static void init_platform_timer(void)
             rc = init_cyclone(pts);
         else if ( !strcmp(opt_clocksource, "acpi") )
             rc = init_pmtimer(pts);
+        else if ( !strcmp(opt_clocksource, "tsc") )
+            rc = init_tsctimer(pts);
 
         if ( rc <= 0 )
             printk("WARNING: %s clocksource '%s'.\n",
@@ -549,16 +593,23 @@ static void init_platform_timer(void)
          !init_pmtimer(pts) )
         init_pit(pts);
 
-    plt_mask = (u32)~0u >> (32 - pts->counter_bits);
-
     set_time_scale(&plt_scale, pts->frequency);
 
-    plt_overflow_period = scale_delta(
-        1ull << (pts->counter_bits-1), &plt_scale);
-    init_timer(&plt_overflow_timer, plt_overflow, NULL, 0);
-    plt_overflow(NULL);
+    if (pts->counter_bits != 64 )
+    {
+        plt_overflow_period = scale_delta(
+            1ull << (pts->counter_bits-1), &plt_scale);
+        init_timer(&plt_overflow_timer, plt_overflow, NULL, 0);
+        plt_overflow(NULL);
 
-    platform_timer_stamp = plt_stamp64;
+        platform_timer_stamp = plt_stamp64;
+    }
+    else
+    {
+        platform_timer_stamp =  stime_platform_stamp = 0;
+        plt_stamp = plt_stamp64 = 0;
+    }
+
 
     printk("Platform timer is %s %s\n",
            freq_string(pts->frequency), pts->name);
@@ -569,7 +620,7 @@ void cstate_save_tsc(void)
     struct cpu_time *t = &this_cpu(cpu_time);
 
     if (!tsc_invariant){
-        t->cstate_plt_count_stamp = plt_src.read_counter();
+        t->cstate_plt_count_stamp = plt_read_counter();
         rdtscll(t->cstate_tsc_stamp);
     }
 }
@@ -577,10 +628,11 @@ void cstate_restore_tsc(void)
 void cstate_restore_tsc(void)
 {
     struct cpu_time *t;
-    u32    plt_count_delta;
+    u64    plt_count_delta;
     u64    tsc_delta;
 
-    if (!tsc_invariant){
+    if ( !tsc_invariant )
+    {
         t = &this_cpu(cpu_time);
 
         /* if platform counter overflow happens, interrupt will bring CPU from
@@ -589,7 +641,7 @@ void cstate_restore_tsc(void)
            is enough for delta calculation
          */
         plt_count_delta = 
-            (plt_src.read_counter() - t->cstate_plt_count_stamp) & plt_mask;
+            (plt_read_counter() - t->cstate_plt_count_stamp) & plt_src.mask;
         tsc_delta = scale_delta(plt_count_delta, &plt_scale)*cpu_khz/1000000UL;
         wrmsrl(MSR_IA32_TSC,  t->cstate_tsc_stamp + tsc_delta);
     }
@@ -687,14 +739,19 @@ static unsigned long get_cmos_time(void)
 
 s_time_t get_s_time(void)
 {
-    struct cpu_time *t = &this_cpu(cpu_time);
+    struct cpu_time *t;
     u64 tsc, delta;
     s_time_t now;
 
     rdtscll(tsc);
-    delta = tsc - t->local_tsc_stamp;
-    now = t->stime_local_stamp + scale_delta(delta, &t->tsc_scale);
-
+    if ( plt_src.counter_bits == 64 )
+        now = scale_delta(tsc, &plt_scale);
+    else
+    {
+        t = &this_cpu(cpu_time);
+        delta = tsc - t->local_tsc_stamp;
+        now = t->stime_local_stamp + scale_delta(delta, &t->tsc_scale);
+    }
     return now;
 }
 
@@ -957,7 +1014,8 @@ void init_percpu_time(void)
 
     local_irq_save(flags);
     rdtscll(t->local_tsc_stamp);
-    now = !plt_src.read_counter ? 0 : read_platform_stime();
+    now = ( !plt_src.read_counter32 && !plt_src.read_counter64 ) ? 0 :
+        read_platform_stime();
     local_irq_restore(flags);
 
     t->stime_master_stamp = now;
@@ -996,6 +1054,7 @@ void __init early_time_init(void)
 {
     u64 tmp = init_pit_and_calibrate_tsc();
 
+    tsc_freq = tmp;
     set_time_scale(&this_cpu(cpu_time).tsc_scale, tmp);
 
     do_div(tmp, 1000);

[-- Attachment #4: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 28+ messages in thread

* RE: RE: [PATCH] clocksource=tsc
  2008-07-14 17:59   ` Dan Magenheimer
@ 2008-07-15  0:35     ` Tian, Kevin
  2008-07-17 23:47       ` Dan Magenheimer
  2008-07-15 13:05     ` Keir Fraser
  1 sibling, 1 reply; 28+ messages in thread
From: Tian, Kevin @ 2008-07-15  0:35 UTC (permalink / raw)
  To: dan.magenheimer, Keir Fraser, Xen-Devel (E-mail); +Cc: Dave Winchell

>From: Dan Magenheimer
>Sent: 2008年7月15日 2:00
>
>> I'm sympathetic to the patch, but it looks like an ugly hack 
>> right now.
>
>> Since it should clearly only affect behaviour when the new
>> clocksource is explicitly selected...
>
>Actually the ugliness is because the read_counter function
>and plt_mask are both u32 and I worried that changing them to
>a u64 would be too invasive.
>
>So here's two possible replacement patches, one with those
>switched to u64 and the other with separate 32- and 64-bit
>read_counter functions.  Both are boot-tested with and
>without clocksource=tsc.
>
>Dan

Boot option is good enough for now, but finally I guess you may
need some type of dynamic clocksource switch mechanism,
just like what Linux does today. User may be not sure of the
specific platform. Some features may affect TSC stabilitity, e.g.
Px may have freq change and Cx may have TSC stop on some
platform. On those path a mark_tsc_unstable step is required
to switch current clocksource to other available platfrom timer
on the fly. So besides the boot option, Xen itself needs to code
such condition checks for TSC stability. :-)

Thanks,
Kevin

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] clocksource=tsc
  2008-07-14 17:59   ` Dan Magenheimer
  2008-07-15  0:35     ` Tian, Kevin
@ 2008-07-15 13:05     ` Keir Fraser
  2008-07-15 14:44       ` Dan Magenheimer
  1 sibling, 1 reply; 28+ messages in thread
From: Keir Fraser @ 2008-07-15 13:05 UTC (permalink / raw)
  To: dan.magenheimer@oracle.com, Xen-Devel (E-mail); +Cc: Dave Winchell

On 14/7/08 18:59, "Dan Magenheimer" <dan.magenheimer@oracle.com> wrote:

>> I'm sympathetic to the patch, but it looks like an ugly hack
>> right now.
> 
>> Since it should clearly only affect behaviour when the new
>> clocksource is explicitly selected...
> 
> Actually the ugliness is because the read_counter function
> and plt_mask are both u32 and I worried that changing them to
> a u64 would be too invasive.
> 
> So here's two possible replacement patches, one with those
> switched to u64 and the other with separate 32- and 64-bit
> read_counter functions.  Both are boot-tested with and
> without clocksource=tsc.

Changeset 18055 is what I had in mind. I split out the
read_counter-is-64-bit portion separately. The changeset needs a bit of
testing and it may need a tweak or two.

 -- Keir

^ permalink raw reply	[flat|nested] 28+ messages in thread

* RE: [PATCH] clocksource=tsc
  2008-07-15 13:05     ` Keir Fraser
@ 2008-07-15 14:44       ` Dan Magenheimer
  2008-07-15 15:08         ` Keir Fraser
  0 siblings, 1 reply; 28+ messages in thread
From: Dan Magenheimer @ 2008-07-15 14:44 UTC (permalink / raw)
  To: Keir Fraser, Xen-Devel (E-mail); +Cc: Dave Winchell

> Changeset 18055 is what I had in mind. I split out the
> read_counter-is-64-bit portion separately. The changeset 
> needs a bit of
> testing and it may need a tweak or two.

Hmmm... One thing I was trying to do with the special
casing in get_s_time() was to avoid using a dynamically
changing scaling (t->tsc_scale).  If I'm not mistaken,
t->tsc_scale is recalculated every EPOCH and thus is
a potential source of stime jitter.  Seems unnecessary
just to make the code look cleaner.

One other nit:  Especially if the above is changed, do
you really prefer a strcmp vs a global variable (or
checking for size == 64) for determining if the tsc
is the platform timer?

Thanks,
Dan

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] clocksource=tsc
  2008-07-15 14:44       ` Dan Magenheimer
@ 2008-07-15 15:08         ` Keir Fraser
  2008-07-15 15:46           ` Dan Magenheimer
  0 siblings, 1 reply; 28+ messages in thread
From: Keir Fraser @ 2008-07-15 15:08 UTC (permalink / raw)
  To: dan.magenheimer@oracle.com, Xen-Devel (E-mail); +Cc: Dave Winchell

On 15/7/08 15:44, "Dan Magenheimer" <dan.magenheimer@oracle.com> wrote:

> Hmmm... One thing I was trying to do with the special
> casing in get_s_time() was to avoid using a dynamically
> changing scaling (t->tsc_scale).  If I'm not mistaken,
> t->tsc_scale is recalculated every EPOCH and thus is
> a potential source of stime jitter.  Seems unnecessary
> just to make the code look cleaner.

My patch disables the per-epoch calibration.

Actually in this mode of operation we hardly need a platform timer *at all*.
The idea is that we let the TSCs free-run, because we know they will behave.
Returning to 32-bit read_counter(), and having NULL read_counter when
clocksource=tsc would be another possibility...

> One other nit:  Especially if the above is changed, do
> you really prefer a strcmp vs a global variable (or
> checking for size == 64) for determining if the tsc
> is the platform timer?

It's hidden in a macro and it's called very rarely. So I think it's fine.

 -- Keir

^ permalink raw reply	[flat|nested] 28+ messages in thread

* RE: [PATCH] clocksource=tsc
  2008-07-15 15:08         ` Keir Fraser
@ 2008-07-15 15:46           ` Dan Magenheimer
  2008-07-15 16:04             ` Dan Magenheimer
  0 siblings, 1 reply; 28+ messages in thread
From: Dan Magenheimer @ 2008-07-15 15:46 UTC (permalink / raw)
  To: Keir Fraser, Xen-Devel (E-mail); +Cc: Dave Winchell

> > Hmmm... One thing I was trying to do with the special
> > casing in get_s_time() was to avoid using a dynamically
> > changing scaling (t->tsc_scale).  If I'm not mistaken,
> > t->tsc_scale is recalculated every EPOCH and thus is
> > a potential source of stime jitter.  Seems unnecessary
> > just to make the code look cleaner.
> 
> My patch disables the per-epoch calibration.

OK, then I think it is the calls to calibrate_tsc_ap() that
result in different values of tsc_scale (potentially a different
one for every processor depending on the precision of the
calibration).  I still think it would be clearer and safer
to always use a fixed tsc_scale value.

> Actually in this mode of operation we hardly need a platform 
> timer *at all*.
> The idea is that we let the TSCs free-run, because we know 
> they will behave.
> Returning to 32-bit read_counter(), and having NULL read_counter when
> clocksource=tsc would be another possibility...

That's essentially what the original tscstable.patch did, though
I was perhaps much uglier in the miscellaneous parts.

Thanks,
Dan

^ permalink raw reply	[flat|nested] 28+ messages in thread

* RE: [PATCH] clocksource=tsc
  2008-07-15 15:46           ` Dan Magenheimer
@ 2008-07-15 16:04             ` Dan Magenheimer
  2008-07-16  1:15               ` Dan Magenheimer
  0 siblings, 1 reply; 28+ messages in thread
From: Dan Magenheimer @ 2008-07-15 16:04 UTC (permalink / raw)
  To: dan.magenheimer@oracle.com, Keir Fraser, Xen-Devel (E-mail); +Cc: Dave Winchell

Hmmm... 18055 also fails to boot on my machine.

Could we perhaps fall back to my original patch and do
cleanup later/separately?  I also want to try implementing
an hpet64-based get_s_time() so will be working more
in this code later... but want to get clocksource=tsc
working now with minimal code impact given the freeze.

> -----Original Message-----
> From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com]
> Sent: Tuesday, July 15, 2008 9:46 AM
> To: 'Keir Fraser'; 'Xen-Devel (E-mail)'
> Cc: 'Dave Winchell'
> Subject: RE: [PATCH] clocksource=tsc
> 
> > Actually in this mode of operation we hardly need a platform 
> > timer *at all*.
> > The idea is that we let the TSCs free-run, because we know 
> > they will behave.
> > Returning to 32-bit read_counter(), and having NULL 
> read_counter when
> > clocksource=tsc would be another possibility...
> 
> That's essentially what the original tscstable.patch did, though
> I was perhaps much uglier in the miscellaneous parts.
> 
> Thanks,
> Dan
>

^ permalink raw reply	[flat|nested] 28+ messages in thread

* RE: [PATCH] clocksource=tsc
  2008-07-15 16:04             ` Dan Magenheimer
@ 2008-07-16  1:15               ` Dan Magenheimer
  2008-07-16  4:11                 ` Dan Magenheimer
  0 siblings, 1 reply; 28+ messages in thread
From: Dan Magenheimer @ 2008-07-16  1:15 UTC (permalink / raw)
  To: dan.magenheimer@oracle.com, Keir Fraser, Xen-Devel (E-mail); +Cc: Dave Winchell

[-- Attachment #1: Type: text/plain, Size: 1975 bytes --]

> > > Returning to 32-bit read_counter(), and having NULL 
> > read_counter when
> > > clocksource=tsc would be another possibility...

Well I hacked on 18055 for awhile and just couldn't get it
to boot.  I think local_time_calibration() (and thus
init_percpu_time()) is necessary for boot, though I'm not really
sure why.  Possibly the "Weirdness can happen..." comment in
that routine?

Anyway, this patch (on top of 18055) DOES work, returns to the
32-bit read_counter, and re-enables local_time_calibration().
I'd suggest putting off more major surgery for another day.

Thanks,
Dan

> -----Original Message-----
> From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com]
> Sent: Tuesday, July 15, 2008 10:04 AM
> To: dan.magenheimer@oracle.com; Keir Fraser; Xen-Devel (E-mail)
> Cc: Dave Winchell
> Subject: RE: [PATCH] clocksource=tsc
> 
> 
> Hmmm... 18055 also fails to boot on my machine.
> 
> Could we perhaps fall back to my original patch and do
> cleanup later/separately?  I also want to try implementing
> an hpet64-based get_s_time() so will be working more
> in this code later... but want to get clocksource=tsc
> working now with minimal code impact given the freeze.
> 
> > -----Original Message-----
> > From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com]
> > Sent: Tuesday, July 15, 2008 9:46 AM
> > To: 'Keir Fraser'; 'Xen-Devel (E-mail)'
> > Cc: 'Dave Winchell'
> > Subject: RE: [PATCH] clocksource=tsc
> > 
> > > Actually in this mode of operation we hardly need a platform 
> > > timer *at all*.
> > > The idea is that we let the TSCs free-run, because we know 
> > > they will behave.
> > > Returning to 32-bit read_counter(), and having NULL 
> > read_counter when
> > > clocksource=tsc would be another possibility...
> > 
> > That's essentially what the original tscstable.patch did, though
> > I was perhaps much uglier in the miscellaneous parts.
> > 
> > Thanks,
> > Dan
> >
> 
>

[-- Attachment #2: tscstable6.patch --]
[-- Type: application/octet-stream, Size: 6950 bytes --]

diff -r 675fb031df88 xen/arch/x86/time.c
--- a/xen/arch/x86/time.c	Tue Jul 15 16:13:46 2008 +0100
+++ b/xen/arch/x86/time.c	Tue Jul 15 19:14:16 2008 -0600
@@ -54,14 +54,14 @@ struct cpu_time {
     s_time_t stime_local_stamp;
     s_time_t stime_master_stamp;
     struct time_scale tsc_scale;
-    u64 cstate_plt_count_stamp;
+    u32 cstate_plt_count_stamp;
     struct timer calibration_timer;
 };
 
 struct platform_timesource {
     char *name;
     u64 frequency;
-    u64 (*read_counter)(void);
+    u32 (*read_counter)(void);
     int counter_bits;
 };
 
@@ -340,7 +340,7 @@ static char *freq_string(u64 freq)
  * PLATFORM TIMER 1: PROGRAMMABLE INTERVAL TIMER (LEGACY PIT)
  */
 
-static u64 read_pit_count(void)
+static u32 read_pit_count(void)
 {
     u16 count16;
     u32 count32;
@@ -372,7 +372,7 @@ static void init_pit(struct platform_tim
  * PLATFORM TIMER 2: HIGH PRECISION EVENT TIMER (HPET)
  */
 
-static u64 read_hpet_count(void)
+static u32 read_hpet_count(void)
 {
     return hpet_read32(HPET_COUNTER);
 }
@@ -412,7 +412,7 @@ int use_cyclone;
 /* Cyclone MPMC0 register. */
 static volatile u32 *cyclone_timer;
 
-static u64 read_cyclone_count(void)
+static u32 read_cyclone_count(void)
 {
     return *cyclone_timer;
 }
@@ -462,7 +462,7 @@ u32 pmtmr_ioport;
 /* ACPI PM timer ticks at 3.579545 MHz. */
 #define ACPI_PM_FREQUENCY 3579545
 
-static u64 read_pmtimer_count(void)
+static u32 read_pmtimer_count(void)
 {
     return inl(pmtmr_ioport);
 }
@@ -484,22 +484,15 @@ static int init_pmtimer(struct platform_
  * PLATFORM TIMER 5: TSC
  */
 
-#define platform_timer_is_tsc() (!strcmp(plt_src.name, "TSC"))
+#define platform_timer_is_tsc() (plt_src.counter_bits == 64)
 static u64 tsc_freq;
-
-static u64 read_tsc_count(void)
-{
-    u64 tsc;
-    rdtscll(tsc);
-    return tsc;
-}
 
 static int init_tsctimer(struct platform_timesource *pts)
 {
     /* TODO: evaluate stability of TSC here, return 0 if not stable. */
     pts->name = "TSC";
     pts->frequency = tsc_freq;
-    pts->read_counter = read_tsc_count;
+    pts->read_counter = 0;  /* unused as of now */
     pts->counter_bits = 64;
     return 1;
 }
@@ -518,13 +511,14 @@ static s_time_t stime_platform_stamp; /*
 static s_time_t stime_platform_stamp; /* System time at below platform time */
 static u64 platform_timer_stamp;      /* Platform time at above system time */
 static u64 plt_stamp64;          /* 64-bit platform counter stamp           */
-static u64 plt_stamp;            /* hardware-width platform counter stamp   */
+static u32 plt_stamp;            /* hardware-width platform counter stamp   */
 static struct timer plt_overflow_timer;
 
 static void plt_overflow(void *unused)
 {
-    u64 count;
+    u32 count;
 
+    ASSERT(plt_src.counter_bits != 64);
     spin_lock(&platform_timer_lock);
     count = plt_src.read_counter();
     plt_stamp64 += (count - plt_stamp) & plt_mask;
@@ -536,16 +530,28 @@ static void plt_overflow(void *unused)
 
 static s_time_t __read_platform_stime(u64 platform_time)
 {
-    u64 diff = platform_time - platform_timer_stamp;
+    u64 diff, tsc;
+
+    if ( platform_timer_is_tsc() )
+    {
+        rdtscll(tsc);
+        return scale_delta(tsc, &plt_scale);
+    }
+    diff = platform_time - platform_timer_stamp;
     ASSERT(spin_is_locked(&platform_timer_lock));
     return (stime_platform_stamp + scale_delta(diff, &plt_scale));
 }
 
 static s_time_t read_platform_stime(void)
 {
-    u64 count;
+    u64 count, tsc;
     s_time_t stime;
 
+    if ( platform_timer_is_tsc() )
+    {
+        rdtscll(tsc);
+        return scale_delta(tsc, &plt_scale);
+    }
     spin_lock(&platform_timer_lock);
     count = plt_stamp64 + ((plt_src.read_counter() - plt_stamp) & plt_mask);
     stime = __read_platform_stime(count);
@@ -559,6 +565,8 @@ static void platform_time_calibration(vo
     u64 count;
     s_time_t stamp;
 
+    if ( platform_timer_is_tsc() )
+        return;
     spin_lock(&platform_timer_lock);
     count = plt_stamp64 + ((plt_src.read_counter() - plt_stamp) & plt_mask);
     stamp = __read_platform_stime(count);
@@ -569,6 +577,8 @@ static void platform_time_calibration(vo
 
 static void resume_platform_timer(void)
 {
+    if ( platform_timer_is_tsc() )
+        return;
     /* No change in platform_stime across suspend/resume. */
     platform_timer_stamp = plt_stamp64;
     plt_stamp = plt_src.read_counter();
@@ -604,16 +614,20 @@ static void init_platform_timer(void)
          !init_pmtimer(pts) )
         init_pit(pts);
 
-    plt_mask = (u64)~0ull >> (64 - pts->counter_bits);
-
     set_time_scale(&plt_scale, pts->frequency);
 
-    plt_overflow_period = scale_delta(
-        1ull << (pts->counter_bits-1), &plt_scale);
-    init_timer(&plt_overflow_timer, plt_overflow, NULL, 0);
-    plt_overflow(NULL);
+    if ( !platform_timer_is_tsc() )
+    {
+        plt_mask = (u32)~0u >> (32 - pts->counter_bits);
 
-    platform_timer_stamp = plt_stamp64;
+        plt_overflow_period = scale_delta(
+            1ull << (pts->counter_bits-1), &plt_scale);
+        init_timer(&plt_overflow_timer, plt_overflow, NULL, 0);
+        plt_overflow(NULL);
+
+        platform_timer_stamp = plt_stamp64;
+    }
+
 
     printk("Platform timer is %s %s\n",
            freq_string(pts->frequency), pts->name);
@@ -633,9 +647,10 @@ void cstate_restore_tsc(void)
 void cstate_restore_tsc(void)
 {
     struct cpu_time *t = &this_cpu(cpu_time);
-    u64 plt_count_delta, tsc_delta;
+    u32    plt_count_delta;
+    u64    tsc_delta;
 
-    if ( tsc_invariant )
+    if ( tsc_invariant || platform_timer_is_tsc() )
         return;
 
     plt_count_delta = (plt_src.read_counter() -
@@ -736,14 +751,18 @@ static unsigned long get_cmos_time(void)
 
 s_time_t get_s_time(void)
 {
-    struct cpu_time *t = &this_cpu(cpu_time);
+    struct cpu_time *t;
     u64 tsc, delta;
     s_time_t now;
 
     rdtscll(tsc);
-    delta = tsc - t->local_tsc_stamp;
-    now = t->stime_local_stamp + scale_delta(delta, &t->tsc_scale);
-
+    if ( platform_timer_is_tsc() )
+        now = scale_delta(tsc, &plt_scale);
+    else {
+        t = &this_cpu(cpu_time);
+        delta = tsc - t->local_tsc_stamp;
+        now = t->stime_local_stamp + scale_delta(delta, &t->tsc_scale);
+    }
     return now;
 }
 
@@ -1008,12 +1027,9 @@ void init_percpu_time(void)
     unsigned long flags;
     s_time_t now;
 
-    if ( platform_timer_is_tsc() )
-        return;
-
     local_irq_save(flags);
     rdtscll(t->local_tsc_stamp);
-    now = read_platform_stime();
+    now = !plt_src.read_counter ? 0 : read_platform_stime();
     local_irq_restore(flags);
 
     t->stime_master_stamp = now;
@@ -1031,10 +1047,10 @@ int __init init_xen_time(void)
 
     local_irq_disable();
 
+    init_percpu_time();
+
     stime_platform_stamp = 0;
     init_platform_timer();
-
-    init_percpu_time();
 
     /* check if TSC is invariant during deep C state
        this is a new feature introduced by Nehalem*/

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 28+ messages in thread

* RE: [PATCH] clocksource=tsc
  2008-07-16  1:15               ` Dan Magenheimer
@ 2008-07-16  4:11                 ` Dan Magenheimer
  2008-07-16 12:43                   ` Dan Magenheimer
  0 siblings, 1 reply; 28+ messages in thread
From: Dan Magenheimer @ 2008-07-16  4:11 UTC (permalink / raw)
  To: dan.magenheimer@oracle.com, Keir Fraser, Xen-Devel (E-mail); +Cc: Dave Winchell

OK, ignore that.  It looks like you have it all fixed
in 18060.  I tried it and it now boots.  Thanks!

> -----Original Message-----
> From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com]
> Sent: Tuesday, July 15, 2008 7:16 PM
> To: 'dan.magenheimer@oracle.com'; 'Keir Fraser'; 'Xen-Devel (E-mail)'
> Cc: 'Dave Winchell'
> Subject: RE: [PATCH] clocksource=tsc
> 
> 
> > > > Returning to 32-bit read_counter(), and having NULL 
> > > read_counter when
> > > > clocksource=tsc would be another possibility...
> 
> Well I hacked on 18055 for awhile and just couldn't get it
> to boot.  I think local_time_calibration() (and thus
> init_percpu_time()) is necessary for boot, though I'm not really
> sure why.  Possibly the "Weirdness can happen..." comment in
> that routine?
> 
> Anyway, this patch (on top of 18055) DOES work, returns to the
> 32-bit read_counter, and re-enables local_time_calibration().
> I'd suggest putting off more major surgery for another day.
> 
> Thanks,
> Dan
> 
> > -----Original Message-----
> > From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com]
> > Sent: Tuesday, July 15, 2008 10:04 AM
> > To: dan.magenheimer@oracle.com; Keir Fraser; Xen-Devel (E-mail)
> > Cc: Dave Winchell
> > Subject: RE: [PATCH] clocksource=tsc
> > 
> > 
> > Hmmm... 18055 also fails to boot on my machine.
> > 
> > Could we perhaps fall back to my original patch and do
> > cleanup later/separately?  I also want to try implementing
> > an hpet64-based get_s_time() so will be working more
> > in this code later... but want to get clocksource=tsc
> > working now with minimal code impact given the freeze.
> > 
> > > -----Original Message-----
> > > From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com]
> > > Sent: Tuesday, July 15, 2008 9:46 AM
> > > To: 'Keir Fraser'; 'Xen-Devel (E-mail)'
> > > Cc: 'Dave Winchell'
> > > Subject: RE: [PATCH] clocksource=tsc
> > > 
> > > > Actually in this mode of operation we hardly need a platform 
> > > > timer *at all*.
> > > > The idea is that we let the TSCs free-run, because we know 
> > > > they will behave.
> > > > Returning to 32-bit read_counter(), and having NULL 
> > > read_counter when
> > > > clocksource=tsc would be another possibility...
> > > 
> > > That's essentially what the original tscstable.patch did, though
> > > I was perhaps much uglier in the miscellaneous parts.
> > > 
> > > Thanks,
> > > Dan
> > >
> > 
> >

^ permalink raw reply	[flat|nested] 28+ messages in thread

* RE: [PATCH] clocksource=tsc
  2008-07-16  4:11                 ` Dan Magenheimer
@ 2008-07-16 12:43                   ` Dan Magenheimer
  2008-07-16 12:49                     ` Keir Fraser
  0 siblings, 1 reply; 28+ messages in thread
From: Dan Magenheimer @ 2008-07-16 12:43 UTC (permalink / raw)
  To: dan.magenheimer@oracle.com, Keir Fraser, Xen-Devel (E-mail); +Cc: Dave Winchell

Well now I have to take that back.  It DOESN'T work yet.
I think I am experiencing "Weirdness can happen..."
when booting with clocksource=tsc... I was away from
the machine overnight but the symptoms I've seen before
are that the system becomes less snappy and eventually
grinds to a near-halt.

Oddly, I can login most of the way on the console
and launch new xterm's in my VNC display, but I never
get a prompt, and I can't interrupt a process I left
running overnight in another xterm.  The time display
in gnome seems to have frozen about an hour after
I booted.  Pinging the machine works but ssh'ing to
it doesn't ("Connection closed")

> -----Original Message-----
> From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com]
> Sent: Tuesday, July 15, 2008 10:12 PM
> To: 'dan.magenheimer@oracle.com'; 'Keir Fraser'; 'Xen-Devel (E-mail)'
> Cc: 'Dave Winchell'
> Subject: RE: [PATCH] clocksource=tsc
> 
> 
> OK, ignore that.  It looks like you have it all fixed
> in 18060.  I tried it and it now boots.  Thanks!
> 
> > -----Original Message-----
> > From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com]
> > Sent: Tuesday, July 15, 2008 7:16 PM
> > To: 'dan.magenheimer@oracle.com'; 'Keir Fraser'; 'Xen-Devel 
> (E-mail)'
> > Cc: 'Dave Winchell'
> > Subject: RE: [PATCH] clocksource=tsc
> > 
> > 
> > > > > Returning to 32-bit read_counter(), and having NULL 
> > > > read_counter when
> > > > > clocksource=tsc would be another possibility...
> > 
> > Well I hacked on 18055 for awhile and just couldn't get it
> > to boot.  I think local_time_calibration() (and thus
> > init_percpu_time()) is necessary for boot, though I'm not really
> > sure why.  Possibly the "Weirdness can happen..." comment in
> > that routine?
> > 
> > Anyway, this patch (on top of 18055) DOES work, returns to the
> > 32-bit read_counter, and re-enables local_time_calibration().
> > I'd suggest putting off more major surgery for another day.
> > 
> > Thanks,
> > Dan
> > 
> > > -----Original Message-----
> > > From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com]
> > > Sent: Tuesday, July 15, 2008 10:04 AM
> > > To: dan.magenheimer@oracle.com; Keir Fraser; Xen-Devel (E-mail)
> > > Cc: Dave Winchell
> > > Subject: RE: [PATCH] clocksource=tsc
> > > 
> > > 
> > > Hmmm... 18055 also fails to boot on my machine.
> > > 
> > > Could we perhaps fall back to my original patch and do
> > > cleanup later/separately?  I also want to try implementing
> > > an hpet64-based get_s_time() so will be working more
> > > in this code later... but want to get clocksource=tsc
> > > working now with minimal code impact given the freeze.
> > > 
> > > > -----Original Message-----
> > > > From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com]
> > > > Sent: Tuesday, July 15, 2008 9:46 AM
> > > > To: 'Keir Fraser'; 'Xen-Devel (E-mail)'
> > > > Cc: 'Dave Winchell'
> > > > Subject: RE: [PATCH] clocksource=tsc
> > > > 
> > > > > Actually in this mode of operation we hardly need a platform 
> > > > > timer *at all*.
> > > > > The idea is that we let the TSCs free-run, because we know 
> > > > > they will behave.
> > > > > Returning to 32-bit read_counter(), and having NULL 
> > > > read_counter when
> > > > > clocksource=tsc would be another possibility...
> > > > 
> > > > That's essentially what the original tscstable.patch did, though
> > > > I was perhaps much uglier in the miscellaneous parts.
> > > > 
> > > > Thanks,
> > > > Dan
> > > >
> > > 
> > >

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] clocksource=tsc
  2008-07-16 12:43                   ` Dan Magenheimer
@ 2008-07-16 12:49                     ` Keir Fraser
  2008-07-16 13:43                       ` Dan Magenheimer
  2008-07-17 23:05                       ` Dan Magenheimer
  0 siblings, 2 replies; 28+ messages in thread
From: Keir Fraser @ 2008-07-16 12:49 UTC (permalink / raw)
  To: dan.magenheimer@oracle.com, Xen-Devel (E-mail); +Cc: Dave Winchell

That's a weird set of symptoms. Perhaps dom0's sense of system time is
diverging from Xen's? I don't see that CPUs can diverge, if their TSCs are
in sync, since we shouldn't be dynamically modifying the per-CPU timestamps
or scale factors.

 -- Keir

On 16/7/08 13:43, "Dan Magenheimer" <dan.magenheimer@oracle.com> wrote:

> Well now I have to take that back.  It DOESN'T work yet.
> I think I am experiencing "Weirdness can happen..."
> when booting with clocksource=tsc... I was away from
> the machine overnight but the symptoms I've seen before
> are that the system becomes less snappy and eventually
> grinds to a near-halt.
> 
> Oddly, I can login most of the way on the console
> and launch new xterm's in my VNC display, but I never
> get a prompt, and I can't interrupt a process I left
> running overnight in another xterm.  The time display
> in gnome seems to have frozen about an hour after
> I booted.  Pinging the machine works but ssh'ing to
> it doesn't ("Connection closed")
> 
>> -----Original Message-----
>> From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com]
>> Sent: Tuesday, July 15, 2008 10:12 PM
>> To: 'dan.magenheimer@oracle.com'; 'Keir Fraser'; 'Xen-Devel (E-mail)'
>> Cc: 'Dave Winchell'
>> Subject: RE: [PATCH] clocksource=tsc
>> 
>> 
>> OK, ignore that.  It looks like you have it all fixed
>> in 18060.  I tried it and it now boots.  Thanks!
>> 
>>> -----Original Message-----
>>> From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com]
>>> Sent: Tuesday, July 15, 2008 7:16 PM
>>> To: 'dan.magenheimer@oracle.com'; 'Keir Fraser'; 'Xen-Devel
>> (E-mail)'
>>> Cc: 'Dave Winchell'
>>> Subject: RE: [PATCH] clocksource=tsc
>>> 
>>> 
>>>>>> Returning to 32-bit read_counter(), and having NULL
>>>>> read_counter when
>>>>>> clocksource=tsc would be another possibility...
>>> 
>>> Well I hacked on 18055 for awhile and just couldn't get it
>>> to boot.  I think local_time_calibration() (and thus
>>> init_percpu_time()) is necessary for boot, though I'm not really
>>> sure why.  Possibly the "Weirdness can happen..." comment in
>>> that routine?
>>> 
>>> Anyway, this patch (on top of 18055) DOES work, returns to the
>>> 32-bit read_counter, and re-enables local_time_calibration().
>>> I'd suggest putting off more major surgery for another day.
>>> 
>>> Thanks,
>>> Dan
>>> 
>>>> -----Original Message-----
>>>> From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com]
>>>> Sent: Tuesday, July 15, 2008 10:04 AM
>>>> To: dan.magenheimer@oracle.com; Keir Fraser; Xen-Devel (E-mail)
>>>> Cc: Dave Winchell
>>>> Subject: RE: [PATCH] clocksource=tsc
>>>> 
>>>> 
>>>> Hmmm... 18055 also fails to boot on my machine.
>>>> 
>>>> Could we perhaps fall back to my original patch and do
>>>> cleanup later/separately?  I also want to try implementing
>>>> an hpet64-based get_s_time() so will be working more
>>>> in this code later... but want to get clocksource=tsc
>>>> working now with minimal code impact given the freeze.
>>>> 
>>>>> -----Original Message-----
>>>>> From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com]
>>>>> Sent: Tuesday, July 15, 2008 9:46 AM
>>>>> To: 'Keir Fraser'; 'Xen-Devel (E-mail)'
>>>>> Cc: 'Dave Winchell'
>>>>> Subject: RE: [PATCH] clocksource=tsc
>>>>> 
>>>>>> Actually in this mode of operation we hardly need a platform
>>>>>> timer *at all*.
>>>>>> The idea is that we let the TSCs free-run, because we know
>>>>>> they will behave.
>>>>>> Returning to 32-bit read_counter(), and having NULL
>>>>> read_counter when
>>>>>> clocksource=tsc would be another possibility...
>>>>> 
>>>>> That's essentially what the original tscstable.patch did, though
>>>>> I was perhaps much uglier in the miscellaneous parts.
>>>>> 
>>>>> Thanks,
>>>>> Dan
>>>>> 
>>>> 
>>>> 
> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* RE: [PATCH] clocksource=tsc
  2008-07-16 12:49                     ` Keir Fraser
@ 2008-07-16 13:43                       ` Dan Magenheimer
  2008-07-16 15:42                         ` Dan Magenheimer
  2008-07-17 23:05                       ` Dan Magenheimer
  1 sibling, 1 reply; 28+ messages in thread
From: Dan Magenheimer @ 2008-07-16 13:43 UTC (permalink / raw)
  To: Keir Fraser, Xen-Devel (E-mail); +Cc: Dave Winchell

I'm thinking the problem may only occur when dom0 is
running ntpd.  Maybe "setting" the platform time only
changes one CPU, or sets the platform time differently
than system time?

> -----Original Message-----
> From: Keir Fraser [mailto:keir.fraser@eu.citrix.com]
> Sent: Wednesday, July 16, 2008 6:50 AM
> To: dan.magenheimer@oracle.com; Xen-Devel (E-mail)
> Cc: Dave Winchell
> Subject: Re: [PATCH] clocksource=tsc
> 
> 
> That's a weird set of symptoms. Perhaps dom0's sense of system time is
> diverging from Xen's? I don't see that CPUs can diverge, if 
> their TSCs are
> in sync, since we shouldn't be dynamically modifying the 
> per-CPU timestamps
> or scale factors.
> 
>  -- Keir
> 
> On 16/7/08 13:43, "Dan Magenheimer" 
> <dan.magenheimer@oracle.com> wrote:
> 
> > Well now I have to take that back.  It DOESN'T work yet.
> > I think I am experiencing "Weirdness can happen..."
> > when booting with clocksource=tsc... I was away from
> > the machine overnight but the symptoms I've seen before
> > are that the system becomes less snappy and eventually
> > grinds to a near-halt.
> >
> > Oddly, I can login most of the way on the console
> > and launch new xterm's in my VNC display, but I never
> > get a prompt, and I can't interrupt a process I left
> > running overnight in another xterm.  The time display
> > in gnome seems to have frozen about an hour after
> > I booted.  Pinging the machine works but ssh'ing to
> > it doesn't ("Connection closed")
> >
> >> -----Original Message-----
> >> From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com]
> >> Sent: Tuesday, July 15, 2008 10:12 PM
> >> To: 'dan.magenheimer@oracle.com'; 'Keir Fraser'; 
> 'Xen-Devel (E-mail)'
> >> Cc: 'Dave Winchell'
> >> Subject: RE: [PATCH] clocksource=tsc
> >>
> >>
> >> OK, ignore that.  It looks like you have it all fixed
> >> in 18060.  I tried it and it now boots.  Thanks!
> >>
> >>> -----Original Message-----
> >>> From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com]
> >>> Sent: Tuesday, July 15, 2008 7:16 PM
> >>> To: 'dan.magenheimer@oracle.com'; 'Keir Fraser'; 'Xen-Devel
> >> (E-mail)'
> >>> Cc: 'Dave Winchell'
> >>> Subject: RE: [PATCH] clocksource=tsc
> >>>
> >>>
> >>>>>> Returning to 32-bit read_counter(), and having NULL
> >>>>> read_counter when
> >>>>>> clocksource=tsc would be another possibility...
> >>>
> >>> Well I hacked on 18055 for awhile and just couldn't get it
> >>> to boot.  I think local_time_calibration() (and thus
> >>> init_percpu_time()) is necessary for boot, though I'm not really
> >>> sure why.  Possibly the "Weirdness can happen..." comment in
> >>> that routine?
> >>>
> >>> Anyway, this patch (on top of 18055) DOES work, returns to the
> >>> 32-bit read_counter, and re-enables local_time_calibration().
> >>> I'd suggest putting off more major surgery for another day.
> >>>
> >>> Thanks,
> >>> Dan
> >>>
> >>>> -----Original Message-----
> >>>> From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com]
> >>>> Sent: Tuesday, July 15, 2008 10:04 AM
> >>>> To: dan.magenheimer@oracle.com; Keir Fraser; Xen-Devel (E-mail)
> >>>> Cc: Dave Winchell
> >>>> Subject: RE: [PATCH] clocksource=tsc
> >>>>
> >>>>
> >>>> Hmmm... 18055 also fails to boot on my machine.
> >>>>
> >>>> Could we perhaps fall back to my original patch and do
> >>>> cleanup later/separately?  I also want to try implementing
> >>>> an hpet64-based get_s_time() so will be working more
> >>>> in this code later... but want to get clocksource=tsc
> >>>> working now with minimal code impact given the freeze.
> >>>>
> >>>>> -----Original Message-----
> >>>>> From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com]
> >>>>> Sent: Tuesday, July 15, 2008 9:46 AM
> >>>>> To: 'Keir Fraser'; 'Xen-Devel (E-mail)'
> >>>>> Cc: 'Dave Winchell'
> >>>>> Subject: RE: [PATCH] clocksource=tsc
> >>>>>
> >>>>>> Actually in this mode of operation we hardly need a platform
> >>>>>> timer *at all*.
> >>>>>> The idea is that we let the TSCs free-run, because we know
> >>>>>> they will behave.
> >>>>>> Returning to 32-bit read_counter(), and having NULL
> >>>>> read_counter when
> >>>>>> clocksource=tsc would be another possibility...
> >>>>>
> >>>>> That's essentially what the original tscstable.patch did, though
> >>>>> I was perhaps much uglier in the miscellaneous parts.
> >>>>>
> >>>>> Thanks,
> >>>>> Dan
> >>>>>
> >>>>
> >>>>
> >
> 
> 
>

^ permalink raw reply	[flat|nested] 28+ messages in thread

* RE: [PATCH] clocksource=tsc
  2008-07-16 13:43                       ` Dan Magenheimer
@ 2008-07-16 15:42                         ` Dan Magenheimer
  2008-07-16 19:32                           ` Keir Fraser
  0 siblings, 1 reply; 28+ messages in thread
From: Dan Magenheimer @ 2008-07-16 15:42 UTC (permalink / raw)
  To: dan.magenheimer@oracle.com, Keir Fraser, Xen-Devel (E-mail); +Cc: Dave Winchell

Nope, its not ntpd.  Everything seemed fine for at
least 15-20 minutes, then I went away for awhile
and when I checked back an hour later, same weird
symptoms as before.

I also tried running with nosmp on the xen boot line
(and confirmed that only one processor was started).
Same results... after about an hour.  So it doesn't
appear to be a result of cross-CPU time issues.

> From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com]
> 
> I'm thinking the problem may only occur when dom0 is
> running ntpd.  Maybe "setting" the platform time only
> changes one CPU, or sets the platform time differently
> than system time?
> 
> > -----Original Message-----
> > From: Keir Fraser [mailto:keir.fraser@eu.citrix.com]
> > Sent: Wednesday, July 16, 2008 6:50 AM
> > To: dan.magenheimer@oracle.com; Xen-Devel (E-mail)
> > Cc: Dave Winchell
> > Subject: Re: [PATCH] clocksource=tsc
> > 
> > 
> > That's a weird set of symptoms. Perhaps dom0's sense of 
> system time is
> > diverging from Xen's? I don't see that CPUs can diverge, if 
> > their TSCs are
> > in sync, since we shouldn't be dynamically modifying the 
> > per-CPU timestamps
> > or scale factors.
> > 
> >  -- Keir
> > 
> > On 16/7/08 13:43, "Dan Magenheimer" 
> > <dan.magenheimer@oracle.com> wrote:
> > 
> > > Well now I have to take that back.  It DOESN'T work yet.
> > > I think I am experiencing "Weirdness can happen..."
> > > when booting with clocksource=tsc... I was away from
> > > the machine overnight but the symptoms I've seen before
> > > are that the system becomes less snappy and eventually
> > > grinds to a near-halt.
> > >
> > > Oddly, I can login most of the way on the console
> > > and launch new xterm's in my VNC display, but I never
> > > get a prompt, and I can't interrupt a process I left
> > > running overnight in another xterm.  The time display
> > > in gnome seems to have frozen about an hour after
> > > I booted.  Pinging the machine works but ssh'ing to
> > > it doesn't ("Connection closed")

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] clocksource=tsc
  2008-07-16 15:42                         ` Dan Magenheimer
@ 2008-07-16 19:32                           ` Keir Fraser
  0 siblings, 0 replies; 28+ messages in thread
From: Keir Fraser @ 2008-07-16 19:32 UTC (permalink / raw)
  To: dan.magenheimer@oracle.com, Xen-Devel (E-mail); +Cc: Dave Winchell

My best guess is still that Xen's calculation of time is diverging from dom0
kernel's. For some reason that is unclear, since of course the calibration
info should be statically determined at boot and never change from there on.

 -- Keir

On 16/7/08 16:42, "Dan Magenheimer" <dan.magenheimer@oracle.com> wrote:

> Nope, its not ntpd.  Everything seemed fine for at
> least 15-20 minutes, then I went away for awhile
> and when I checked back an hour later, same weird
> symptoms as before.
> 
> I also tried running with nosmp on the xen boot line
> (and confirmed that only one processor was started).
> Same results... after about an hour.  So it doesn't
> appear to be a result of cross-CPU time issues.
> 
>> From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com]
>> 
>> I'm thinking the problem may only occur when dom0 is
>> running ntpd.  Maybe "setting" the platform time only
>> changes one CPU, or sets the platform time differently
>> than system time?
>> 
>>> -----Original Message-----
>>> From: Keir Fraser [mailto:keir.fraser@eu.citrix.com]
>>> Sent: Wednesday, July 16, 2008 6:50 AM
>>> To: dan.magenheimer@oracle.com; Xen-Devel (E-mail)
>>> Cc: Dave Winchell
>>> Subject: Re: [PATCH] clocksource=tsc
>>> 
>>> 
>>> That's a weird set of symptoms. Perhaps dom0's sense of
>> system time is
>>> diverging from Xen's? I don't see that CPUs can diverge, if
>>> their TSCs are
>>> in sync, since we shouldn't be dynamically modifying the
>>> per-CPU timestamps
>>> or scale factors.
>>> 
>>>  -- Keir
>>> 
>>> On 16/7/08 13:43, "Dan Magenheimer"
>>> <dan.magenheimer@oracle.com> wrote:
>>> 
>>>> Well now I have to take that back.  It DOESN'T work yet.
>>>> I think I am experiencing "Weirdness can happen..."
>>>> when booting with clocksource=tsc... I was away from
>>>> the machine overnight but the symptoms I've seen before
>>>> are that the system becomes less snappy and eventually
>>>> grinds to a near-halt.
>>>> 
>>>> Oddly, I can login most of the way on the console
>>>> and launch new xterm's in my VNC display, but I never
>>>> get a prompt, and I can't interrupt a process I left
>>>> running overnight in another xterm.  The time display
>>>> in gnome seems to have frozen about an hour after
>>>> I booted.  Pinging the machine works but ssh'ing to
>>>> it doesn't ("Connection closed")
> 
> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* RE: [PATCH] clocksource=tsc
  2008-07-16 12:49                     ` Keir Fraser
  2008-07-16 13:43                       ` Dan Magenheimer
@ 2008-07-17 23:05                       ` Dan Magenheimer
  2008-07-18  7:24                         ` Keir Fraser
  1 sibling, 1 reply; 28+ messages in thread
From: Dan Magenheimer @ 2008-07-17 23:05 UTC (permalink / raw)
  To: Keir Fraser, Xen-Devel (E-mail); +Cc: Dave Winchell

[-- Attachment #1: Type: text/plain, Size: 4673 bytes --]

After trying lots of things, I fell back to my known
working version of time.c and adapted it forward to
match tip.  The attached patch (on top of 18063) boots
clocksource=tsc and doesn't display the weirdness for
2-1/2 hours of testing (always showed up around 1 hour before).

It may not be elegant but it works and tip doesn't (with
clocksource=tsc).

Dan

> -----Original Message-----
> From: Keir Fraser [mailto:keir.fraser@eu.citrix.com]
> Sent: Wednesday, July 16, 2008 6:50 AM
> To: dan.magenheimer@oracle.com; Xen-Devel (E-mail)
> Cc: Dave Winchell
> Subject: Re: [PATCH] clocksource=tsc
> 
> 
> That's a weird set of symptoms. Perhaps dom0's sense of system time is
> diverging from Xen's? I don't see that CPUs can diverge, if 
> their TSCs are
> in sync, since we shouldn't be dynamically modifying the 
> per-CPU timestamps
> or scale factors.
> 
>  -- Keir
> 
> On 16/7/08 13:43, "Dan Magenheimer" 
> <dan.magenheimer@oracle.com> wrote:
> 
> > Well now I have to take that back.  It DOESN'T work yet.
> > I think I am experiencing "Weirdness can happen..."
> > when booting with clocksource=tsc... I was away from
> > the machine overnight but the symptoms I've seen before
> > are that the system becomes less snappy and eventually
> > grinds to a near-halt.
> >
> > Oddly, I can login most of the way on the console
> > and launch new xterm's in my VNC display, but I never
> > get a prompt, and I can't interrupt a process I left
> > running overnight in another xterm.  The time display
> > in gnome seems to have frozen about an hour after
> > I booted.  Pinging the machine works but ssh'ing to
> > it doesn't ("Connection closed")
> >
> >> -----Original Message-----
> >> From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com]
> >> Sent: Tuesday, July 15, 2008 10:12 PM
> >> To: 'dan.magenheimer@oracle.com'; 'Keir Fraser'; 
> 'Xen-Devel (E-mail)'
> >> Cc: 'Dave Winchell'
> >> Subject: RE: [PATCH] clocksource=tsc
> >>
> >>
> >> OK, ignore that.  It looks like you have it all fixed
> >> in 18060.  I tried it and it now boots.  Thanks!
> >>
> >>> -----Original Message-----
> >>> From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com]
> >>> Sent: Tuesday, July 15, 2008 7:16 PM
> >>> To: 'dan.magenheimer@oracle.com'; 'Keir Fraser'; 'Xen-Devel
> >> (E-mail)'
> >>> Cc: 'Dave Winchell'
> >>> Subject: RE: [PATCH] clocksource=tsc
> >>>
> >>>
> >>>>>> Returning to 32-bit read_counter(), and having NULL
> >>>>> read_counter when
> >>>>>> clocksource=tsc would be another possibility...
> >>>
> >>> Well I hacked on 18055 for awhile and just couldn't get it
> >>> to boot.  I think local_time_calibration() (and thus
> >>> init_percpu_time()) is necessary for boot, though I'm not really
> >>> sure why.  Possibly the "Weirdness can happen..." comment in
> >>> that routine?
> >>>
> >>> Anyway, this patch (on top of 18055) DOES work, returns to the
> >>> 32-bit read_counter, and re-enables local_time_calibration().
> >>> I'd suggest putting off more major surgery for another day.
> >>>
> >>> Thanks,
> >>> Dan
> >>>
> >>>> -----Original Message-----
> >>>> From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com]
> >>>> Sent: Tuesday, July 15, 2008 10:04 AM
> >>>> To: dan.magenheimer@oracle.com; Keir Fraser; Xen-Devel (E-mail)
> >>>> Cc: Dave Winchell
> >>>> Subject: RE: [PATCH] clocksource=tsc
> >>>>
> >>>>
> >>>> Hmmm... 18055 also fails to boot on my machine.
> >>>>
> >>>> Could we perhaps fall back to my original patch and do
> >>>> cleanup later/separately?  I also want to try implementing
> >>>> an hpet64-based get_s_time() so will be working more
> >>>> in this code later... but want to get clocksource=tsc
> >>>> working now with minimal code impact given the freeze.
> >>>>
> >>>>> -----Original Message-----
> >>>>> From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com]
> >>>>> Sent: Tuesday, July 15, 2008 9:46 AM
> >>>>> To: 'Keir Fraser'; 'Xen-Devel (E-mail)'
> >>>>> Cc: 'Dave Winchell'
> >>>>> Subject: RE: [PATCH] clocksource=tsc
> >>>>>
> >>>>>> Actually in this mode of operation we hardly need a platform
> >>>>>> timer *at all*.
> >>>>>> The idea is that we let the TSCs free-run, because we know
> >>>>>> they will behave.
> >>>>>> Returning to 32-bit read_counter(), and having NULL
> >>>>> read_counter when
> >>>>>> clocksource=tsc would be another possibility...
> >>>>>
> >>>>> That's essentially what the original tscstable.patch did, though
> >>>>> I was perhaps much uglier in the miscellaneous parts.
> >>>>>
> >>>>> Thanks,
> >>>>> Dan
> >>>>>
> >>>>
> >>>>
> >
> 
> 
>

[-- Attachment #2: tscdebug7.patch --]
[-- Type: application/octet-stream, Size: 4947 bytes --]

diff -r ea6a9793928d xen/arch/x86/time.c
--- a/xen/arch/x86/time.c	Tue Jul 15 18:00:13 2008 +0100
+++ b/xen/arch/x86/time.c	Thu Jul 17 14:34:24 2008 -0600
@@ -484,7 +484,7 @@ static int init_pmtimer(struct platform_
  * PLATFORM TIMER 5: TSC
  */
 
-#define platform_timer_is_tsc() (!strcmp(plt_src.name, "TSC"))
+#define platform_timer_is_tsc() (plt_src.counter_bits == 64)
 static u64 tsc_freq;
 
 static u64 read_tsc_count(void)
@@ -496,22 +496,7 @@ static u64 read_tsc_count(void)
 
 static int init_tsctimer(struct platform_timesource *pts)
 {
-    unsigned int cpu;
-
-    /*
-     * TODO: evaluate stability of TSC here, return 0 if not stable.
-     * For now we assume all TSCs are synchronised and hence can all share
-     * CPU 0's calibration values.
-     */
-    for_each_cpu ( cpu )
-    {
-        if ( cpu == 0 )
-            continue;
-        memcpy(&per_cpu(cpu_time, cpu),
-               &per_cpu(cpu_time, 0),
-               sizeof(struct cpu_time));
-    }
-
+    /* TODO: evaluate stability of TSC here, return 0 if not stable */
     pts->name = "TSC";
     pts->frequency = tsc_freq;
     pts->read_counter = read_tsc_count;
@@ -541,6 +526,7 @@ static void plt_overflow(void *unused)
 {
     u64 count;
 
+    ASSERT(plt_src.counter_bits <= 32);
     spin_lock(&platform_timer_lock);
     count = plt_src.read_counter();
     plt_stamp64 += (count - plt_stamp) & plt_mask;
@@ -575,6 +561,8 @@ static void platform_time_calibration(vo
     u64 count;
     s_time_t stamp;
 
+    if ( plt_src.counter_bits == 64 )
+        return;
     spin_lock(&platform_timer_lock);
     count = plt_stamp64 + ((plt_src.read_counter() - plt_stamp) & plt_mask);
     stamp = __read_platform_stime(count);
@@ -585,6 +573,8 @@ static void platform_time_calibration(vo
 
 static void resume_platform_timer(void)
 {
+    if ( plt_src.counter_bits == 64 )
+        return;
     /* No change in platform_stime across suspend/resume. */
     platform_timer_stamp = plt_stamp64;
     plt_stamp = plt_src.read_counter();
@@ -624,12 +614,21 @@ static void init_platform_timer(void)
 
     set_time_scale(&plt_scale, pts->frequency);
 
-    plt_overflow_period = scale_delta(
-        1ull << (pts->counter_bits-1), &plt_scale);
-    init_timer(&plt_overflow_timer, plt_overflow, NULL, 0);
-    plt_overflow(NULL);
+    if (pts->counter_bits != 64 )
+    {
+        plt_overflow_period = scale_delta(
+            1ull << (pts->counter_bits-1), &plt_scale);
+        init_timer(&plt_overflow_timer, plt_overflow, NULL, 0);
+        plt_overflow(NULL);
 
-    platform_timer_stamp = plt_stamp64;
+        platform_timer_stamp = plt_stamp64;
+    }
+    else
+    {
+        platform_timer_stamp =  stime_platform_stamp = 0;
+        plt_stamp = plt_stamp64 = 0;
+    }
+
 
     printk("Platform timer is %s %s\n",
            freq_string(pts->frequency), pts->name);
@@ -752,14 +751,19 @@ static unsigned long get_cmos_time(void)
 
 s_time_t get_s_time(void)
 {
-    struct cpu_time *t = &this_cpu(cpu_time);
+    struct cpu_time *t;
     u64 tsc, delta;
     s_time_t now;
 
     rdtscll(tsc);
-    delta = tsc - t->local_tsc_stamp;
-    now = t->stime_local_stamp + scale_delta(delta, &t->tsc_scale);
-
+    if (platform_timer_is_tsc() )
+        now = scale_delta(tsc, &plt_scale);
+    else
+    {
+        t = &this_cpu(cpu_time);
+        delta = tsc - t->local_tsc_stamp;
+        now = t->stime_local_stamp + scale_delta(delta, &t->tsc_scale);
+    }
     return now;
 }
 
@@ -821,10 +825,6 @@ int cpu_frequency_change(u64 freq)
 {
     struct cpu_time *t = &this_cpu(cpu_time);
     u64 curr_tsc;
-
-    /* Nothing to do if TSC is platform timer. Assume it is constant-rate. */
-    if ( platform_timer_is_tsc() )
-        return 0;
 
     /* Sanity check: CPU frequency allegedly dropping below 1MHz? */
     if ( freq < 1000000u )
@@ -1024,12 +1024,9 @@ void init_percpu_time(void)
     unsigned long flags;
     s_time_t now;
 
-    if ( platform_timer_is_tsc() )
-        return;
-
     local_irq_save(flags);
     rdtscll(t->local_tsc_stamp);
-    now = read_platform_stime();
+    now = ( !plt_src.read_counter ) ? 0 : read_platform_stime();
     local_irq_restore(flags);
 
     t->stime_master_stamp = now;
@@ -1047,10 +1044,10 @@ int __init init_xen_time(void)
 
     local_irq_disable();
 
+    init_percpu_time();
+
     stime_platform_stamp = 0;
     init_platform_timer();
-
-    init_percpu_time();
 
     /* check if TSC is invariant during deep C state
        this is a new feature introduced by Nehalem*/
@@ -1146,12 +1143,11 @@ int time_suspend(void)
 
 int time_resume(void)
 {
-    /*u64 tmp = */init_pit_and_calibrate_tsc();
+    u64 tmp = init_pit_and_calibrate_tsc();
 
     disable_pit_irq();
 
-    /* Disable this while calibrate_tsc_ap() also is skipped. */
-    /*set_time_scale(&this_cpu(cpu_time).tsc_scale, tmp);*/
+    set_time_scale(&this_cpu(cpu_time).tsc_scale, tmp);
 
     resume_platform_timer();
 

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 28+ messages in thread

* RE: RE: [PATCH] clocksource=tsc
  2008-07-15  0:35     ` Tian, Kevin
@ 2008-07-17 23:47       ` Dan Magenheimer
  0 siblings, 0 replies; 28+ messages in thread
From: Dan Magenheimer @ 2008-07-17 23:47 UTC (permalink / raw)
  To: Tian, Kevin, Keir Fraser, Xen-Devel (E-mail); +Cc: Dave Winchell

[-- Attachment #1: Type: text/plain, Size: 2030 bytes --]

> Boot option is good enough for now, but finally I guess you may
> need some type of dynamic clocksource switch mechanism,
> just like what Linux does today. User may be not sure of the
> specific platform. Some features may affect TSC stabilitity, e.g.
> Px may have freq change and Cx may have TSC stop on some
> platform. On those path a mark_tsc_unstable step is required
> to switch current clocksource to other available platfrom timer
> on the fly. So besides the boot option, Xen itself needs to code
> such condition checks for TSC stability. :-)

Yes, agreed.

I think there are "good tsc" machines where TSC will never
skew, "bad tsc" machines where the skew is apparent at
boot, and "grey tsc" machines where there is skew but
the skew happens to be small at boot but may grow
to be bad post-boot possibly due to Px/Cx.  In order
to handle all of these here's the algorithm that I'm
thinking of:

1) Use processor bits (borrowing code from a recent Linux
   version) to determine whether a system is likely to
   be "good tsc" or "bad tsc".  Set the tsc_invariant
   global variable accordingly.
2) When synchronize_tsc_bp()/ap() dynamically evaluates
   skew, change tsc_invariant if appropriate.
3) If tsc_invariant is set when clocksource is being
   selected, tsc should be the default clocksource,
   unless overridden by clocksource= on the boot line
   OR a new boot parameter "notsc".
4) Write a pair of routines equivalent to
   synchronize_tsc_bp/ap() but which
   just returns whether or not TSCs are sync'ed.  Call this
   routine whenever a processor exits from Cx/Px and
   also on a decaying counter, e.g. 1 second after boot,
   then 2 seconds after that, then 4 seconds after that,
   etc.  If skew is detected, change the clocksource
   to the next best and printk the change.
5) I don't know if it is currently "safe" to change
   clocksources after the initial selection in
   init_platform_timer() so this may take some work.

Comments?

Thanks,
Dan

[-- Attachment #2: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] clocksource=tsc
  2008-07-17 23:05                       ` Dan Magenheimer
@ 2008-07-18  7:24                         ` Keir Fraser
  2008-07-18 11:01                           ` Keir Fraser
  0 siblings, 1 reply; 28+ messages in thread
From: Keir Fraser @ 2008-07-18  7:24 UTC (permalink / raw)
  To: dan.magenheimer@oracle.com, Xen-Devel (E-mail); +Cc: Dave Winchell

You shouldn't have to modify get_s_time(). Guests will still be running the
old algorithm (sys_stamp + (now_tsc - stamp_tsc) * scale_tsc), so existing
get_s_time() ought to work. In fact it must work.

 -- Keir

On 18/7/08 00:05, "Dan Magenheimer" <dan.magenheimer@oracle.com> wrote:

> After trying lots of things, I fell back to my known
> working version of time.c and adapted it forward to
> match tip.  The attached patch (on top of 18063) boots
> clocksource=tsc and doesn't display the weirdness for
> 2-1/2 hours of testing (always showed up around 1 hour before).
> 
> It may not be elegant but it works and tip doesn't (with
> clocksource=tsc).
> 
> Dan
> 
>> -----Original Message-----
>> From: Keir Fraser [mailto:keir.fraser@eu.citrix.com]
>> Sent: Wednesday, July 16, 2008 6:50 AM
>> To: dan.magenheimer@oracle.com; Xen-Devel (E-mail)
>> Cc: Dave Winchell
>> Subject: Re: [PATCH] clocksource=tsc
>> 
>> 
>> That's a weird set of symptoms. Perhaps dom0's sense of system time is
>> diverging from Xen's? I don't see that CPUs can diverge, if
>> their TSCs are
>> in sync, since we shouldn't be dynamically modifying the
>> per-CPU timestamps
>> or scale factors.
>> 
>>  -- Keir
>> 
>> On 16/7/08 13:43, "Dan Magenheimer"
>> <dan.magenheimer@oracle.com> wrote:
>> 
>>> Well now I have to take that back.  It DOESN'T work yet.
>>> I think I am experiencing "Weirdness can happen..."
>>> when booting with clocksource=tsc... I was away from
>>> the machine overnight but the symptoms I've seen before
>>> are that the system becomes less snappy and eventually
>>> grinds to a near-halt.
>>> 
>>> Oddly, I can login most of the way on the console
>>> and launch new xterm's in my VNC display, but I never
>>> get a prompt, and I can't interrupt a process I left
>>> running overnight in another xterm.  The time display
>>> in gnome seems to have frozen about an hour after
>>> I booted.  Pinging the machine works but ssh'ing to
>>> it doesn't ("Connection closed")
>>> 
>>>> -----Original Message-----
>>>> From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com]
>>>> Sent: Tuesday, July 15, 2008 10:12 PM
>>>> To: 'dan.magenheimer@oracle.com'; 'Keir Fraser';
>> 'Xen-Devel (E-mail)'
>>>> Cc: 'Dave Winchell'
>>>> Subject: RE: [PATCH] clocksource=tsc
>>>> 
>>>> 
>>>> OK, ignore that.  It looks like you have it all fixed
>>>> in 18060.  I tried it and it now boots.  Thanks!
>>>> 
>>>>> -----Original Message-----
>>>>> From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com]
>>>>> Sent: Tuesday, July 15, 2008 7:16 PM
>>>>> To: 'dan.magenheimer@oracle.com'; 'Keir Fraser'; 'Xen-Devel
>>>> (E-mail)'
>>>>> Cc: 'Dave Winchell'
>>>>> Subject: RE: [PATCH] clocksource=tsc
>>>>> 
>>>>> 
>>>>>>>> Returning to 32-bit read_counter(), and having NULL
>>>>>>> read_counter when
>>>>>>>> clocksource=tsc would be another possibility...
>>>>> 
>>>>> Well I hacked on 18055 for awhile and just couldn't get it
>>>>> to boot.  I think local_time_calibration() (and thus
>>>>> init_percpu_time()) is necessary for boot, though I'm not really
>>>>> sure why.  Possibly the "Weirdness can happen..." comment in
>>>>> that routine?
>>>>> 
>>>>> Anyway, this patch (on top of 18055) DOES work, returns to the
>>>>> 32-bit read_counter, and re-enables local_time_calibration().
>>>>> I'd suggest putting off more major surgery for another day.
>>>>> 
>>>>> Thanks,
>>>>> Dan
>>>>> 
>>>>>> -----Original Message-----
>>>>>> From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com]
>>>>>> Sent: Tuesday, July 15, 2008 10:04 AM
>>>>>> To: dan.magenheimer@oracle.com; Keir Fraser; Xen-Devel (E-mail)
>>>>>> Cc: Dave Winchell
>>>>>> Subject: RE: [PATCH] clocksource=tsc
>>>>>> 
>>>>>> 
>>>>>> Hmmm... 18055 also fails to boot on my machine.
>>>>>> 
>>>>>> Could we perhaps fall back to my original patch and do
>>>>>> cleanup later/separately?  I also want to try implementing
>>>>>> an hpet64-based get_s_time() so will be working more
>>>>>> in this code later... but want to get clocksource=tsc
>>>>>> working now with minimal code impact given the freeze.
>>>>>> 
>>>>>>> -----Original Message-----
>>>>>>> From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com]
>>>>>>> Sent: Tuesday, July 15, 2008 9:46 AM
>>>>>>> To: 'Keir Fraser'; 'Xen-Devel (E-mail)'
>>>>>>> Cc: 'Dave Winchell'
>>>>>>> Subject: RE: [PATCH] clocksource=tsc
>>>>>>> 
>>>>>>>> Actually in this mode of operation we hardly need a platform
>>>>>>>> timer *at all*.
>>>>>>>> The idea is that we let the TSCs free-run, because we know
>>>>>>>> they will behave.
>>>>>>>> Returning to 32-bit read_counter(), and having NULL
>>>>>>> read_counter when
>>>>>>>> clocksource=tsc would be another possibility...
>>>>>>> 
>>>>>>> That's essentially what the original tscstable.patch did, though
>>>>>>> I was perhaps much uglier in the miscellaneous parts.
>>>>>>> 
>>>>>>> Thanks,
>>>>>>> Dan
>>>>>>> 
>>>>>> 
>>>>>> 
>>> 
>> 
>> 
>> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: Re: [PATCH] clocksource=tsc
  2008-07-18  7:24                         ` Keir Fraser
@ 2008-07-18 11:01                           ` Keir Fraser
  2008-07-18 11:10                             ` Keir Fraser
  0 siblings, 1 reply; 28+ messages in thread
From: Keir Fraser @ 2008-07-18 11:01 UTC (permalink / raw)
  To: Keir Fraser, dan.magenheimer@oracle.com, Xen-Devel (E-mail); +Cc: Dave Winchell

Okay, I'm pretty sure the reason your patch works is that system time as
calculated in the guest is still being continually calibrated by
local_time_calibration() in Xen. You do not turn off that calibration
function and hence it will continually update the cpu_time structure with
new timestamps and scale factors. Is this really intended?

The reason mine doesn't work is that the time record exposed to the guest
*never* changes. The calculation in the guest is thus effectively:
 sys_time = TSC * scale_factor

The problem is that this is sensitive to small errors in the scale factor!
And unfortunately we compute a new scale factor, to convert to us, as
scale_factor_ns/1000. The resulting scale factor is obviously less accurate,
and leads to an accumulating absolute error as the TSC value gets large.

So, when all is said and done, what are we actually trying to achieve here
and how should it really work? As far as I can see we cannot avoid sending
an updated time record to guests periodically. But running
local_time_calibration() only for guests and not for Xen system time seems
pretty weird to me.

Perhaps we should push this all off until after 3.3?

 -- Keir

On 18/7/08 08:24, "Keir Fraser" <keir.fraser@eu.citrix.com> wrote:

> You shouldn't have to modify get_s_time(). Guests will still be running the
> old algorithm (sys_stamp + (now_tsc - stamp_tsc) * scale_tsc), so existing
> get_s_time() ought to work. In fact it must work.
> 
>  -- Keir
> 
> On 18/7/08 00:05, "Dan Magenheimer" <dan.magenheimer@oracle.com> wrote:
> 
>> After trying lots of things, I fell back to my known
>> working version of time.c and adapted it forward to
>> match tip.  The attached patch (on top of 18063) boots
>> clocksource=tsc and doesn't display the weirdness for
>> 2-1/2 hours of testing (always showed up around 1 hour before).
>> 
>> It may not be elegant but it works and tip doesn't (with
>> clocksource=tsc).
>> 
>> Dan
>> 
>>> -----Original Message-----
>>> From: Keir Fraser [mailto:keir.fraser@eu.citrix.com]
>>> Sent: Wednesday, July 16, 2008 6:50 AM
>>> To: dan.magenheimer@oracle.com; Xen-Devel (E-mail)
>>> Cc: Dave Winchell
>>> Subject: Re: [PATCH] clocksource=tsc
>>> 
>>> 
>>> That's a weird set of symptoms. Perhaps dom0's sense of system time is
>>> diverging from Xen's? I don't see that CPUs can diverge, if
>>> their TSCs are
>>> in sync, since we shouldn't be dynamically modifying the
>>> per-CPU timestamps
>>> or scale factors.
>>> 
>>>  -- Keir
>>> 
>>> On 16/7/08 13:43, "Dan Magenheimer"
>>> <dan.magenheimer@oracle.com> wrote:
>>> 
>>>> Well now I have to take that back.  It DOESN'T work yet.
>>>> I think I am experiencing "Weirdness can happen..."
>>>> when booting with clocksource=tsc... I was away from
>>>> the machine overnight but the symptoms I've seen before
>>>> are that the system becomes less snappy and eventually
>>>> grinds to a near-halt.
>>>> 
>>>> Oddly, I can login most of the way on the console
>>>> and launch new xterm's in my VNC display, but I never
>>>> get a prompt, and I can't interrupt a process I left
>>>> running overnight in another xterm.  The time display
>>>> in gnome seems to have frozen about an hour after
>>>> I booted.  Pinging the machine works but ssh'ing to
>>>> it doesn't ("Connection closed")
>>>> 
>>>>> -----Original Message-----
>>>>> From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com]
>>>>> Sent: Tuesday, July 15, 2008 10:12 PM
>>>>> To: 'dan.magenheimer@oracle.com'; 'Keir Fraser';
>>> 'Xen-Devel (E-mail)'
>>>>> Cc: 'Dave Winchell'
>>>>> Subject: RE: [PATCH] clocksource=tsc
>>>>> 
>>>>> 
>>>>> OK, ignore that.  It looks like you have it all fixed
>>>>> in 18060.  I tried it and it now boots.  Thanks!
>>>>> 
>>>>>> -----Original Message-----
>>>>>> From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com]
>>>>>> Sent: Tuesday, July 15, 2008 7:16 PM
>>>>>> To: 'dan.magenheimer@oracle.com'; 'Keir Fraser'; 'Xen-Devel
>>>>> (E-mail)'
>>>>>> Cc: 'Dave Winchell'
>>>>>> Subject: RE: [PATCH] clocksource=tsc
>>>>>> 
>>>>>> 
>>>>>>>>> Returning to 32-bit read_counter(), and having NULL
>>>>>>>> read_counter when
>>>>>>>>> clocksource=tsc would be another possibility...
>>>>>> 
>>>>>> Well I hacked on 18055 for awhile and just couldn't get it
>>>>>> to boot.  I think local_time_calibration() (and thus
>>>>>> init_percpu_time()) is necessary for boot, though I'm not really
>>>>>> sure why.  Possibly the "Weirdness can happen..." comment in
>>>>>> that routine?
>>>>>> 
>>>>>> Anyway, this patch (on top of 18055) DOES work, returns to the
>>>>>> 32-bit read_counter, and re-enables local_time_calibration().
>>>>>> I'd suggest putting off more major surgery for another day.
>>>>>> 
>>>>>> Thanks,
>>>>>> Dan
>>>>>> 
>>>>>>> -----Original Message-----
>>>>>>> From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com]
>>>>>>> Sent: Tuesday, July 15, 2008 10:04 AM
>>>>>>> To: dan.magenheimer@oracle.com; Keir Fraser; Xen-Devel (E-mail)
>>>>>>> Cc: Dave Winchell
>>>>>>> Subject: RE: [PATCH] clocksource=tsc
>>>>>>> 
>>>>>>> 
>>>>>>> Hmmm... 18055 also fails to boot on my machine.
>>>>>>> 
>>>>>>> Could we perhaps fall back to my original patch and do
>>>>>>> cleanup later/separately?  I also want to try implementing
>>>>>>> an hpet64-based get_s_time() so will be working more
>>>>>>> in this code later... but want to get clocksource=tsc
>>>>>>> working now with minimal code impact given the freeze.
>>>>>>> 
>>>>>>>> -----Original Message-----
>>>>>>>> From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com]
>>>>>>>> Sent: Tuesday, July 15, 2008 9:46 AM
>>>>>>>> To: 'Keir Fraser'; 'Xen-Devel (E-mail)'
>>>>>>>> Cc: 'Dave Winchell'
>>>>>>>> Subject: RE: [PATCH] clocksource=tsc
>>>>>>>> 
>>>>>>>>> Actually in this mode of operation we hardly need a platform
>>>>>>>>> timer *at all*.
>>>>>>>>> The idea is that we let the TSCs free-run, because we know
>>>>>>>>> they will behave.
>>>>>>>>> Returning to 32-bit read_counter(), and having NULL
>>>>>>>> read_counter when
>>>>>>>>> clocksource=tsc would be another possibility...
>>>>>>>> 
>>>>>>>> That's essentially what the original tscstable.patch did, though
>>>>>>>> I was perhaps much uglier in the miscellaneous parts.
>>>>>>>> 
>>>>>>>> Thanks,
>>>>>>>> Dan
>>>>>>>> 
>>>>>>> 
>>>>>>> 
>>>> 
>>> 
>>> 
>>> 
> 
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: Re: [PATCH] clocksource=tsc
  2008-07-18 11:01                           ` Keir Fraser
@ 2008-07-18 11:10                             ` Keir Fraser
  2008-07-18 14:19                               ` Dan Magenheimer
  0 siblings, 1 reply; 28+ messages in thread
From: Keir Fraser @ 2008-07-18 11:10 UTC (permalink / raw)
  To: Keir Fraser, dan.magenheimer@oracle.com, Xen-Devel (E-mail); +Cc: Dave Winchell

Ah, I changed my mind. I now think it's due to overflow of a 32-bit usecs
variable in Linux's do_gettimeofday(). Cause is still the same though -- the
guest assuming that the timestamp record from Xen will never be 'very' stale
(in this case, older than 1h12m).

 -- Keir

On 18/7/08 12:01, "Keir Fraser" <keir.fraser@eu.citrix.com> wrote:

> Okay, I'm pretty sure the reason your patch works is that system time as
> calculated in the guest is still being continually calibrated by
> local_time_calibration() in Xen. You do not turn off that calibration
> function and hence it will continually update the cpu_time structure with
> new timestamps and scale factors. Is this really intended?
> 
> The reason mine doesn't work is that the time record exposed to the guest
> *never* changes. The calculation in the guest is thus effectively:
>  sys_time = TSC * scale_factor
> 
> The problem is that this is sensitive to small errors in the scale factor!
> And unfortunately we compute a new scale factor, to convert to us, as
> scale_factor_ns/1000. The resulting scale factor is obviously less accurate,
> and leads to an accumulating absolute error as the TSC value gets large.
> 
> So, when all is said and done, what are we actually trying to achieve here
> and how should it really work? As far as I can see we cannot avoid sending
> an updated time record to guests periodically. But running
> local_time_calibration() only for guests and not for Xen system time seems
> pretty weird to me.
> 
> Perhaps we should push this all off until after 3.3?
> 
>  -- Keir
> 
> On 18/7/08 08:24, "Keir Fraser" <keir.fraser@eu.citrix.com> wrote:
> 
>> You shouldn't have to modify get_s_time(). Guests will still be running the
>> old algorithm (sys_stamp + (now_tsc - stamp_tsc) * scale_tsc), so existing
>> get_s_time() ought to work. In fact it must work.
>> 
>>  -- Keir
>> 
>> On 18/7/08 00:05, "Dan Magenheimer" <dan.magenheimer@oracle.com> wrote:
>> 
>>> After trying lots of things, I fell back to my known
>>> working version of time.c and adapted it forward to
>>> match tip.  The attached patch (on top of 18063) boots
>>> clocksource=tsc and doesn't display the weirdness for
>>> 2-1/2 hours of testing (always showed up around 1 hour before).
>>> 
>>> It may not be elegant but it works and tip doesn't (with
>>> clocksource=tsc).
>>> 
>>> Dan
>>> 
>>>> -----Original Message-----
>>>> From: Keir Fraser [mailto:keir.fraser@eu.citrix.com]
>>>> Sent: Wednesday, July 16, 2008 6:50 AM
>>>> To: dan.magenheimer@oracle.com; Xen-Devel (E-mail)
>>>> Cc: Dave Winchell
>>>> Subject: Re: [PATCH] clocksource=tsc
>>>> 
>>>> 
>>>> That's a weird set of symptoms. Perhaps dom0's sense of system time is
>>>> diverging from Xen's? I don't see that CPUs can diverge, if
>>>> their TSCs are
>>>> in sync, since we shouldn't be dynamically modifying the
>>>> per-CPU timestamps
>>>> or scale factors.
>>>> 
>>>>  -- Keir
>>>> 
>>>> On 16/7/08 13:43, "Dan Magenheimer"
>>>> <dan.magenheimer@oracle.com> wrote:
>>>> 
>>>>> Well now I have to take that back.  It DOESN'T work yet.
>>>>> I think I am experiencing "Weirdness can happen..."
>>>>> when booting with clocksource=tsc... I was away from
>>>>> the machine overnight but the symptoms I've seen before
>>>>> are that the system becomes less snappy and eventually
>>>>> grinds to a near-halt.
>>>>> 
>>>>> Oddly, I can login most of the way on the console
>>>>> and launch new xterm's in my VNC display, but I never
>>>>> get a prompt, and I can't interrupt a process I left
>>>>> running overnight in another xterm.  The time display
>>>>> in gnome seems to have frozen about an hour after
>>>>> I booted.  Pinging the machine works but ssh'ing to
>>>>> it doesn't ("Connection closed")
>>>>> 
>>>>>> -----Original Message-----
>>>>>> From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com]
>>>>>> Sent: Tuesday, July 15, 2008 10:12 PM
>>>>>> To: 'dan.magenheimer@oracle.com'; 'Keir Fraser';
>>>> 'Xen-Devel (E-mail)'
>>>>>> Cc: 'Dave Winchell'
>>>>>> Subject: RE: [PATCH] clocksource=tsc
>>>>>> 
>>>>>> 
>>>>>> OK, ignore that.  It looks like you have it all fixed
>>>>>> in 18060.  I tried it and it now boots.  Thanks!
>>>>>> 
>>>>>>> -----Original Message-----
>>>>>>> From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com]
>>>>>>> Sent: Tuesday, July 15, 2008 7:16 PM
>>>>>>> To: 'dan.magenheimer@oracle.com'; 'Keir Fraser'; 'Xen-Devel
>>>>>> (E-mail)'
>>>>>>> Cc: 'Dave Winchell'
>>>>>>> Subject: RE: [PATCH] clocksource=tsc
>>>>>>> 
>>>>>>> 
>>>>>>>>>> Returning to 32-bit read_counter(), and having NULL
>>>>>>>>> read_counter when
>>>>>>>>>> clocksource=tsc would be another possibility...
>>>>>>> 
>>>>>>> Well I hacked on 18055 for awhile and just couldn't get it
>>>>>>> to boot.  I think local_time_calibration() (and thus
>>>>>>> init_percpu_time()) is necessary for boot, though I'm not really
>>>>>>> sure why.  Possibly the "Weirdness can happen..." comment in
>>>>>>> that routine?
>>>>>>> 
>>>>>>> Anyway, this patch (on top of 18055) DOES work, returns to the
>>>>>>> 32-bit read_counter, and re-enables local_time_calibration().
>>>>>>> I'd suggest putting off more major surgery for another day.
>>>>>>> 
>>>>>>> Thanks,
>>>>>>> Dan
>>>>>>> 
>>>>>>>> -----Original Message-----
>>>>>>>> From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com]
>>>>>>>> Sent: Tuesday, July 15, 2008 10:04 AM
>>>>>>>> To: dan.magenheimer@oracle.com; Keir Fraser; Xen-Devel (E-mail)
>>>>>>>> Cc: Dave Winchell
>>>>>>>> Subject: RE: [PATCH] clocksource=tsc
>>>>>>>> 
>>>>>>>> 
>>>>>>>> Hmmm... 18055 also fails to boot on my machine.
>>>>>>>> 
>>>>>>>> Could we perhaps fall back to my original patch and do
>>>>>>>> cleanup later/separately?  I also want to try implementing
>>>>>>>> an hpet64-based get_s_time() so will be working more
>>>>>>>> in this code later... but want to get clocksource=tsc
>>>>>>>> working now with minimal code impact given the freeze.
>>>>>>>> 
>>>>>>>>> -----Original Message-----
>>>>>>>>> From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com]
>>>>>>>>> Sent: Tuesday, July 15, 2008 9:46 AM
>>>>>>>>> To: 'Keir Fraser'; 'Xen-Devel (E-mail)'
>>>>>>>>> Cc: 'Dave Winchell'
>>>>>>>>> Subject: RE: [PATCH] clocksource=tsc
>>>>>>>>> 
>>>>>>>>>> Actually in this mode of operation we hardly need a platform
>>>>>>>>>> timer *at all*.
>>>>>>>>>> The idea is that we let the TSCs free-run, because we know
>>>>>>>>>> they will behave.
>>>>>>>>>> Returning to 32-bit read_counter(), and having NULL
>>>>>>>>> read_counter when
>>>>>>>>>> clocksource=tsc would be another possibility...
>>>>>>>>> 
>>>>>>>>> That's essentially what the original tscstable.patch did, though
>>>>>>>>> I was perhaps much uglier in the miscellaneous parts.
>>>>>>>>> 
>>>>>>>>> Thanks,
>>>>>>>>> Dan
>>>>>>>>> 
>>>>>>>> 
>>>>>>>> 
>>>>> 
>>>> 
>>>> 
>>>> 
>> 
>> 
>> 
>> _______________________________________________
>> Xen-devel mailing list
>> Xen-devel@lists.xensource.com
>> http://lists.xensource.com/xen-devel
> 
> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* RE: Re: [PATCH] clocksource=tsc
  2008-07-18 11:10                             ` Keir Fraser
@ 2008-07-18 14:19                               ` Dan Magenheimer
  2008-07-18 14:29                                 ` Keir Fraser
  0 siblings, 1 reply; 28+ messages in thread
From: Dan Magenheimer @ 2008-07-18 14:19 UTC (permalink / raw)
  To: Keir Fraser, Xen-Devel (E-mail); +Cc: Dave Winchell

OK, I see.  So:

If we have a 64-bit precise, monotonic, relatively constant rate
(i.e. "ideal") timesource, Xen system time should use it directly.
Call this the "new" algorithm.  If we do NOT have such a timesource,
Xen system time MUST use the (sys_stamp+(now_tsc-stamp_tsc)*scale_tsc)
method... the "old" algorithm.

Unfortunately, the old algorithm is hard-baked into PV domains.
But, to maximize precision in HV domains, we would like to use
the new algorithm whenever there is an ideal timesource.  In
fact, there's no reason why Xen itself shouldn't use an
ideal timesource if available.

So perhaps the compromise which I've (admittedly accidentally)
crafted is the right answer for now.  And the right answer for
later is to update the PV time mechanisms (in a backwards
compatible way of course) to determine if an ideal timesource
is available and use it if it is.

> -----Original Message-----
> From: Keir Fraser [mailto:keir.fraser@eu.citrix.com]
> Sent: Friday, July 18, 2008 5:10 AM
> To: Keir Fraser; dan.magenheimer@oracle.com; Xen-Devel (E-mail)
> Cc: Dave Winchell
> Subject: Re: [Xen-devel] Re: [PATCH] clocksource=tsc
> 
> 
> Ah, I changed my mind. I now think it's due to overflow of a 
> 32-bit usecs
> variable in Linux's do_gettimeofday(). Cause is still the 
> same though -- the
> guest assuming that the timestamp record from Xen will never 
> be 'very' stale
> (in this case, older than 1h12m).
> 
>  -- Keir
> 
> On 18/7/08 12:01, "Keir Fraser" <keir.fraser@eu.citrix.com> wrote:
> 
> > Okay, I'm pretty sure the reason your patch works is that 
> system time as
> > calculated in the guest is still being continually calibrated by
> > local_time_calibration() in Xen. You do not turn off that 
> calibration
> > function and hence it will continually update the cpu_time 
> structure with
> > new timestamps and scale factors. Is this really intended?
> >
> > The reason mine doesn't work is that the time record 
> exposed to the guest
> > *never* changes. The calculation in the guest is thus effectively:
> >  sys_time = TSC * scale_factor
> >
> > The problem is that this is sensitive to small errors in 
> the scale factor!
> > And unfortunately we compute a new scale factor, to convert 
> to us, as
> > scale_factor_ns/1000. The resulting scale factor is 
> obviously less accurate,
> > and leads to an accumulating absolute error as the TSC 
> value gets large.
> >
> > So, when all is said and done, what are we actually trying 
> to achieve here
> > and how should it really work? As far as I can see we 
> cannot avoid sending
> > an updated time record to guests periodically. But running
> > local_time_calibration() only for guests and not for Xen 
> system time seems
> > pretty weird to me.
> >
> > Perhaps we should push this all off until after 3.3?
> >
> >  -- Keir
> >
> > On 18/7/08 08:24, "Keir Fraser" <keir.fraser@eu.citrix.com> wrote:
> >
> >> You shouldn't have to modify get_s_time(). Guests will 
> still be running the
> >> old algorithm (sys_stamp + (now_tsc - stamp_tsc) * 
> scale_tsc), so existing
> >> get_s_time() ought to work. In fact it must work.
> >>
> >>  -- Keir
> >>
> >> On 18/7/08 00:05, "Dan Magenheimer" 
> <dan.magenheimer@oracle.com> wrote:
> >>
> >>> After trying lots of things, I fell back to my known
> >>> working version of time.c and adapted it forward to
> >>> match tip.  The attached patch (on top of 18063) boots
> >>> clocksource=tsc and doesn't display the weirdness for
> >>> 2-1/2 hours of testing (always showed up around 1 hour before).
> >>>
> >>> It may not be elegant but it works and tip doesn't (with
> >>> clocksource=tsc).
> >>>
> >>> Dan
> >>>
> >>>> -----Original Message-----
> >>>> From: Keir Fraser [mailto:keir.fraser@eu.citrix.com]
> >>>> Sent: Wednesday, July 16, 2008 6:50 AM
> >>>> To: dan.magenheimer@oracle.com; Xen-Devel (E-mail)
> >>>> Cc: Dave Winchell
> >>>> Subject: Re: [PATCH] clocksource=tsc
> >>>>
> >>>>
> >>>> That's a weird set of symptoms. Perhaps dom0's sense of 
> system time is
> >>>> diverging from Xen's? I don't see that CPUs can diverge, if
> >>>> their TSCs are
> >>>> in sync, since we shouldn't be dynamically modifying the
> >>>> per-CPU timestamps
> >>>> or scale factors.
> >>>>
> >>>>  -- Keir
> >>>>
> >>>> On 16/7/08 13:43, "Dan Magenheimer"
> >>>> <dan.magenheimer@oracle.com> wrote:
> >>>>
> >>>>> Well now I have to take that back.  It DOESN'T work yet.
> >>>>> I think I am experiencing "Weirdness can happen..."
> >>>>> when booting with clocksource=tsc... I was away from
> >>>>> the machine overnight but the symptoms I've seen before
> >>>>> are that the system becomes less snappy and eventually
> >>>>> grinds to a near-halt.
> >>>>>
> >>>>> Oddly, I can login most of the way on the console
> >>>>> and launch new xterm's in my VNC display, but I never
> >>>>> get a prompt, and I can't interrupt a process I left
> >>>>> running overnight in another xterm.  The time display
> >>>>> in gnome seems to have frozen about an hour after
> >>>>> I booted.  Pinging the machine works but ssh'ing to
> >>>>> it doesn't ("Connection closed")
> >>>>>
> >>>>>> -----Original Message-----
> >>>>>> From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com]
> >>>>>> Sent: Tuesday, July 15, 2008 10:12 PM
> >>>>>> To: 'dan.magenheimer@oracle.com'; 'Keir Fraser';
> >>>> 'Xen-Devel (E-mail)'
> >>>>>> Cc: 'Dave Winchell'
> >>>>>> Subject: RE: [PATCH] clocksource=tsc
> >>>>>>
> >>>>>>
> >>>>>> OK, ignore that.  It looks like you have it all fixed
> >>>>>> in 18060.  I tried it and it now boots.  Thanks!
> >>>>>>
> >>>>>>> -----Original Message-----
> >>>>>>> From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com]
> >>>>>>> Sent: Tuesday, July 15, 2008 7:16 PM
> >>>>>>> To: 'dan.magenheimer@oracle.com'; 'Keir Fraser'; 'Xen-Devel
> >>>>>> (E-mail)'
> >>>>>>> Cc: 'Dave Winchell'
> >>>>>>> Subject: RE: [PATCH] clocksource=tsc
> >>>>>>>
> >>>>>>>
> >>>>>>>>>> Returning to 32-bit read_counter(), and having NULL
> >>>>>>>>> read_counter when
> >>>>>>>>>> clocksource=tsc would be another possibility...
> >>>>>>>
> >>>>>>> Well I hacked on 18055 for awhile and just couldn't get it
> >>>>>>> to boot.  I think local_time_calibration() (and thus
> >>>>>>> init_percpu_time()) is necessary for boot, though I'm 
> not really
> >>>>>>> sure why.  Possibly the "Weirdness can happen..." comment in
> >>>>>>> that routine?
> >>>>>>>
> >>>>>>> Anyway, this patch (on top of 18055) DOES work, returns to the
> >>>>>>> 32-bit read_counter, and re-enables local_time_calibration().
> >>>>>>> I'd suggest putting off more major surgery for another day.
> >>>>>>>
> >>>>>>> Thanks,
> >>>>>>> Dan
> >>>>>>>
> >>>>>>>> -----Original Message-----
> >>>>>>>> From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com]
> >>>>>>>> Sent: Tuesday, July 15, 2008 10:04 AM
> >>>>>>>> To: dan.magenheimer@oracle.com; Keir Fraser; 
> Xen-Devel (E-mail)
> >>>>>>>> Cc: Dave Winchell
> >>>>>>>> Subject: RE: [PATCH] clocksource=tsc
> >>>>>>>>
> >>>>>>>>
> >>>>>>>> Hmmm... 18055 also fails to boot on my machine.
> >>>>>>>>
> >>>>>>>> Could we perhaps fall back to my original patch and do
> >>>>>>>> cleanup later/separately?  I also want to try implementing
> >>>>>>>> an hpet64-based get_s_time() so will be working more
> >>>>>>>> in this code later... but want to get clocksource=tsc
> >>>>>>>> working now with minimal code impact given the freeze.
> >>>>>>>>
> >>>>>>>>> -----Original Message-----
> >>>>>>>>> From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com]
> >>>>>>>>> Sent: Tuesday, July 15, 2008 9:46 AM
> >>>>>>>>> To: 'Keir Fraser'; 'Xen-Devel (E-mail)'
> >>>>>>>>> Cc: 'Dave Winchell'
> >>>>>>>>> Subject: RE: [PATCH] clocksource=tsc
> >>>>>>>>>
> >>>>>>>>>> Actually in this mode of operation we hardly need 
> a platform
> >>>>>>>>>> timer *at all*.
> >>>>>>>>>> The idea is that we let the TSCs free-run, because we know
> >>>>>>>>>> they will behave.
> >>>>>>>>>> Returning to 32-bit read_counter(), and having NULL
> >>>>>>>>> read_counter when
> >>>>>>>>>> clocksource=tsc would be another possibility...
> >>>>>>>>>
> >>>>>>>>> That's essentially what the original 
> tscstable.patch did, though
> >>>>>>>>> I was perhaps much uglier in the miscellaneous parts.
> >>>>>>>>>
> >>>>>>>>> Thanks,
> >>>>>>>>> Dan
> >>>>>>>>>
> >>>>>>>>
> >>>>>>>>
> >>>>>
> >>>>
> >>>>
> >>>>
> >>
> >>
> >>
> >> _______________________________________________
> >> Xen-devel mailing list
> >> Xen-devel@lists.xensource.com
> >> http://lists.xensource.com/xen-devel
> >
> >
> 
> 
>

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: Re: [PATCH] clocksource=tsc
  2008-07-18 14:19                               ` Dan Magenheimer
@ 2008-07-18 14:29                                 ` Keir Fraser
  2008-07-18 14:56                                   ` Dan Magenheimer
  0 siblings, 1 reply; 28+ messages in thread
From: Keir Fraser @ 2008-07-18 14:29 UTC (permalink / raw)
  To: dan.magenheimer@oracle.com, Xen-Devel (E-mail); +Cc: Dave Winchell

On 18/7/08 15:19, "Dan Magenheimer" <dan.magenheimer@oracle.com> wrote:

> So perhaps the compromise which I've (admittedly accidentally)
> crafted is the right answer for now.  And the right answer for
> later is to update the PV time mechanisms (in a backwards
> compatible way of course) to determine if an ideal timesource
> is available and use it if it is.

Well, maybe, although I don't see we have any guarantee that 'platform
system time' and Xen's get_s_time won't diverge in absolute terms as time
passes. The scale factors each use are calculated somewhat independently.
Actually I think it may get it right just now, but it looks rather fragile
and it stores up trouble for systems with long uptimes if you get it wrong.
There's no particular reason not to generate fresh time records every few
seconds and use those both in Xen and in guests, using the existing
compute-system-time algorithm.

 -- Keir

^ permalink raw reply	[flat|nested] 28+ messages in thread

* RE: Re: [PATCH] clocksource=tsc
  2008-07-18 14:29                                 ` Keir Fraser
@ 2008-07-18 14:56                                   ` Dan Magenheimer
  2008-07-18 15:00                                     ` Keir Fraser
  0 siblings, 1 reply; 28+ messages in thread
From: Dan Magenheimer @ 2008-07-18 14:56 UTC (permalink / raw)
  To: Keir Fraser, Xen-Devel (E-mail); +Cc: Dave Winchell

> > So perhaps the compromise which I've (admittedly accidentally)
> > crafted is the right answer for now.  And the right answer for
> > later is to update the PV time mechanisms (in a backwards
> > compatible way of course) to determine if an ideal timesource
> > is available and use it if it is.
> 
> Well, maybe, although I don't see we have any guarantee that 'platform
> system time' and Xen's get_s_time won't diverge in absolute 
> terms as time
> passes. The scale factors each use are calculated somewhat 
> independently.
> Actually I think it may get it right just now, but it looks 
> rather fragile
> and it stores up trouble for systems with long uptimes if you 
> get it wrong.
> There's no particular reason not to generate fresh time 
> records every few
> seconds and use those both in Xen and in guests, using the existing
> compute-system-time algorithm.

It appears that, for clocksource=tsc, as long as both
read_platform_stime() and get_s_time() are returning
scaled-tsc there can be no divergence.

The issue with generating fresh time records every
few seconds is that it unnecessarily introduces jitter
into an otherwise ideal timesource.

Dan

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: Re: [PATCH] clocksource=tsc
  2008-07-18 14:56                                   ` Dan Magenheimer
@ 2008-07-18 15:00                                     ` Keir Fraser
  2008-07-18 16:51                                       ` Dan Magenheimer
  0 siblings, 1 reply; 28+ messages in thread
From: Keir Fraser @ 2008-07-18 15:00 UTC (permalink / raw)
  To: dan.magenheimer@oracle.com, Xen-Devel (E-mail); +Cc: Dave Winchell

On 18/7/08 15:56, "Dan Magenheimer" <dan.magenheimer@oracle.com> wrote:

> It appears that, for clocksource=tsc, as long as both
> read_platform_stime() and get_s_time() are returning
> scaled-tsc there can be no divergence.
> 
> The issue with generating fresh time records every
> few seconds is that it unnecessarily introduces jitter
> into an otherwise ideal timesource.

I don't think this is necessarily true. If we write code to generate
accurate time records specifically for clocksource=tsc then we should easily
get accuracy down to a couple of parts per billion. This is certainly a more
pragmatic solution than extending the guest time interfaces. I am at least
coming round to the fact that the changes required in Xen's time.c are going
to have to be a bit more drastic than I first hoped.

 -- Keir

^ permalink raw reply	[flat|nested] 28+ messages in thread

* RE: Re: [PATCH] clocksource=tsc
  2008-07-18 15:00                                     ` Keir Fraser
@ 2008-07-18 16:51                                       ` Dan Magenheimer
  2008-07-18 19:28                                         ` Keir Fraser
  0 siblings, 1 reply; 28+ messages in thread
From: Dan Magenheimer @ 2008-07-18 16:51 UTC (permalink / raw)
  To: Keir Fraser, Xen-Devel (E-mail); +Cc: Dave Winchell

> I don't think this is necessarily true. If we write code to generate
> accurate time records specifically for clocksource=tsc then 
> we should easily
> get accuracy down to a couple of parts per billion. This is 
> certainly a more
> pragmatic solution than extending the guest time interfaces. 
> I am at least
> coming round to the fact that the changes required in Xen's 
> time.c are going
> to have to be a bit more drastic than I first hoped.
> 
>  -- Keir

I guess you are correct as long as "generate a time record"
doesn't include recomputing the scaling factor every
second (which is I think what introduces the jitter... see
more on this below).

However, I'm not sure why you perceive the aesthetics to
be so bad to put "if (ideal_clocksource)" in get_s_time()
and a few other places in time.c... or for that matter in
the PV guest code.

Your existing algorithm is a very cool and elegant way to
optimize time handling for all "legacy" time sources.
The more I understand about it, the more I am impressed!
But it is very difficult to understand and for most "modern"
hardware it unnecessarily obfuscates time handling.  So IMHO
I much prefer the "if (ideal_clocksource)... else ..."
approach rather than shoehorning the ideal to look like
the legacy.  And that applies for the PV guest code as well.

On a related note:

> The problem is that this is sensitive to small errors in the 
> scale factor!
> And unfortunately we compute a new scale factor, to convert to us, as
> scale_factor_ns/1000. The resulting scale factor is obviously 
> less accurate,
> and leads to an accumulating absolute error as the TSC value 
> gets large.

Yes I noticed that the mul_frac+scale_delta code gives different
answers than muldiv64, though they have essentially the same API.
I didn't do much testing but they differed after about 5 digits
and muldiv64 does a hardware divide so I suspect it is more
accurate (though also probably slower).  It might be worth looking
at this to squeeze out more precision.

Thanks for all your time and attention on this.  As Xen supports
more time-sensitive workloads (such as databases on enterprise
systems and games on PCs), I think it will pay off!

Dan

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: Re: [PATCH] clocksource=tsc
  2008-07-18 16:51                                       ` Dan Magenheimer
@ 2008-07-18 19:28                                         ` Keir Fraser
  0 siblings, 0 replies; 28+ messages in thread
From: Keir Fraser @ 2008-07-18 19:28 UTC (permalink / raw)
  To: dan.magenheimer@oracle.com, Xen-Devel (E-mail); +Cc: Dave Winchell

On 18/7/08 17:51, "Dan Magenheimer" <dan.magenheimer@oracle.com> wrote:

> I guess you are correct as long as "generate a time record"
> doesn't include recomputing the scaling factor every
> second (which is I think what introduces the jitter... see
> more on this below).
> 
> However, I'm not sure why you perceive the aesthetics to
> be so bad to put "if (ideal_clocksource)" in get_s_time()
> and a few other places in time.c... or for that matter in
> the PV guest code.

Extra special case are ugly. And then if they extend into the guests then
you also have to worry about new guests on old Xen and old guests on new Xen
(i.e., compatibility both ways). Which is not a good way to go when there is
an easy way to hide this new implementation from guests. Indeed we would
only update the TSC and system-time stamps -- the scale factor would never
change.

 -- Keir

^ permalink raw reply	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2008-07-18 19:28 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-07-12 21:38 [PATCH] clocksource=tsc Dan Magenheimer
2008-07-13  3:59 ` Dan Magenheimer
2008-07-14  9:24 ` Keir Fraser
2008-07-14 17:59   ` Dan Magenheimer
2008-07-15  0:35     ` Tian, Kevin
2008-07-17 23:47       ` Dan Magenheimer
2008-07-15 13:05     ` Keir Fraser
2008-07-15 14:44       ` Dan Magenheimer
2008-07-15 15:08         ` Keir Fraser
2008-07-15 15:46           ` Dan Magenheimer
2008-07-15 16:04             ` Dan Magenheimer
2008-07-16  1:15               ` Dan Magenheimer
2008-07-16  4:11                 ` Dan Magenheimer
2008-07-16 12:43                   ` Dan Magenheimer
2008-07-16 12:49                     ` Keir Fraser
2008-07-16 13:43                       ` Dan Magenheimer
2008-07-16 15:42                         ` Dan Magenheimer
2008-07-16 19:32                           ` Keir Fraser
2008-07-17 23:05                       ` Dan Magenheimer
2008-07-18  7:24                         ` Keir Fraser
2008-07-18 11:01                           ` Keir Fraser
2008-07-18 11:10                             ` Keir Fraser
2008-07-18 14:19                               ` Dan Magenheimer
2008-07-18 14:29                                 ` Keir Fraser
2008-07-18 14:56                                   ` Dan Magenheimer
2008-07-18 15:00                                     ` Keir Fraser
2008-07-18 16:51                                       ` Dan Magenheimer
2008-07-18 19:28                                         ` Keir Fraser

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.