All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Bryan O'Sullivan" <bos@serpentine.com>
To: Andi Kleen <ak@suse.de>
Cc: vojtech@suse.cz, discuss@x86-64.org, linux-kernel@vger.kernel.org
Subject: Re: [PATCH] New x86_64 time code for 2.5.70
Date: 11 Jun 2003 13:10:49 -0700	[thread overview]
Message-ID: <1055362249.17154.86.camel@serpentine.internal.keyresearch.com> (raw)
In-Reply-To: <1055361411.17154.83.camel@serpentine.internal.keyresearch.com>

[-- Attachment #1: Type: text/plain, Size: 787 bytes --]

On Wed, 2003-06-11 at 12:56, Bryan O'Sullivan wrote:

> I'll send the plain diff -u instead next time.

Here's a new version of the patch, with problems fixed, and whitespace
fixes included.  Boots just fine.

	<b

 arch/x86_64/Kconfig              |   12 +
 arch/x86_64/kernel/acpi/boot.c   |    6
 arch/x86_64/kernel/apic.c        |   84 +++++-----
 arch/x86_64/kernel/smpboot.c     |   11 +
 arch/x86_64/kernel/time.c        |  305 +++++++++++++++++++++++++++++++-------- arch/x86_64/kernel/vsyscall.c    |   28 ++-
 arch/x86_64/vmlinux.lds.S        |    6
 include/asm-x86_64/fixmap.h      |    2
 include/asm-x86_64/mc146818rtc.h |    5
 include/asm-x86_64/timex.h       |   30 +++
 include/asm-x86_64/vsyscall.h    |   18 +-
 11 files changed, 383 insertions(+), 124 deletions(-)


[-- Attachment #2: Type: text/plain, Size: 29323 bytes --]

# This is a BitKeeper generated patch for the following project:
# Project Name: Linux kernel tree
# This patch format is intended for GNU patch command version 2.5 or higher.
# This patch includes the following deltas:
#	           ChangeSet	1.1436  -> 1.1438 
#	arch/x86_64/vmlinux.lds.S	1.16    -> 1.17   
#	arch/x86_64/kernel/apic.c	1.20    -> 1.21   
#	include/asm-x86_64/fixmap.h	1.3     -> 1.4    
#	arch/x86_64/kernel/acpi/boot.c	1.1     -> 1.2    
#	include/asm-x86_64/mc146818rtc.h	1.1     -> 1.2    
#	include/asm-x86_64/timex.h	1.6     -> 1.7    
#	arch/x86_64/kernel/time.c	1.15    -> 1.17   
#	arch/x86_64/kernel/vsyscall.c	1.10    -> 1.11   
#	 arch/x86_64/Kconfig	1.22    -> 1.23   
#	include/asm-x86_64/vsyscall.h	1.5     -> 1.6    
#	arch/x86_64/kernel/smpboot.c	1.19    -> 1.20   
#
# The following is the BitKeeper ChangeSet Log
# --------------------------------------------
# 03/06/11	bos@serpentine.internal.keyresearch.com	1.1437
# Forward port 2.4 time code.  Optionally uses HPET instead of PIT/RTC for
# gettimeofday calculations.  Far more stable than the current 2.5 code.
# 
# Current caveat: this code doesn't track lost interrupts properly, so we
# can very slowly lose a jiffy here or there.
# --------------------------------------------
# 03/06/11	bos@serpentine.internal.keyresearch.com	1.1438
# Fix residual bogons.
# --------------------------------------------
#
diff -Nru a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
--- a/arch/x86_64/Kconfig	Wed Jun 11 13:08:09 2003
+++ b/arch/x86_64/Kconfig	Wed Jun 11 13:08:09 2003
@@ -52,6 +52,18 @@
 	  klogd/syslogd or the X server. You should normally N here, unless
 	  you want to debug such a crash.
 	  
+config HPET_TIMER
+	bool
+	default y
+	help
+	  Use the IA-PC HPET (High Precision Event Timer) to manage
+	  time in preference to the PIT and RTC, if a HPET is
+	  present.  The HPET provides a stable time base on SMP
+	  systems, unlike the RTC, but it is more expensive to access,
+	  as it is off-chip.  You can find the HPET spec at
+	  <http://www.intel.com/labs/platcomp/hpet/hpetspec.htm>.
+
+	  If unsure, say Y.
 
 config GENERIC_ISA_DMA
 	bool
diff -Nru a/arch/x86_64/kernel/acpi/boot.c b/arch/x86_64/kernel/acpi/boot.c
--- a/arch/x86_64/kernel/acpi/boot.c	Wed Jun 11 13:08:09 2003
+++ b/arch/x86_64/kernel/acpi/boot.c	Wed Jun 11 13:08:09 2003
@@ -244,9 +244,11 @@
 		return -1;
 	}
 
-	hpet.address = hpet_tbl->addr.addrl | ((long) hpet_tbl->addr.addrh << 32);
+	vxtime.hpet_address = hpet_tbl->addr.addrl |
+		((long) hpet_tbl->addr.addrh << 32);
 
-	printk(KERN_INFO "acpi: HPET id: %#x base: %#lx\n", hpet_tbl->id, hpet.address);
+	printk(KERN_INFO "acpi: HPET id: %#x base: %#lx\n",
+	       hpet_tbl->id, vxtime.hpet_address);
 
 	return 0;
 } 
diff -Nru a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c
--- a/arch/x86_64/kernel/apic.c	Wed Jun 11 13:08:09 2003
+++ b/arch/x86_64/kernel/apic.c	Wed Jun 11 13:08:09 2003
@@ -46,7 +46,7 @@
 void enable_NMI_through_LVT0 (void * dummy)
 {
 	unsigned int v, ver;
-	
+
 	ver = apic_read(APIC_LVR);
 	ver = GET_APIC_VERSION(ver);
 	v = APIC_DM_NMI;                        /* unmask and set to NMI */
@@ -297,7 +297,7 @@
 	 * Double-check whether this APIC is really registered.
 	 * This is meaningless in clustered apic mode, so we skip it.
 	 */
-	if (!clustered_apic_mode && 
+	if (!clustered_apic_mode &&
 	    !test_bit(GET_APIC_ID(apic_read(APIC_ID)), &phys_cpu_present_map))
 		BUG();
 
@@ -309,7 +309,7 @@
 
 	if (!clustered_apic_mode) {
 		/*
-		 * In clustered apic mode, the firmware does this for us 
+		 * In clustered apic mode, the firmware does this for us
 		 * Put the APIC into flat delivery mode.
 		 * Must be "all ones" explicitly for 82489DX.
 		 */
@@ -422,15 +422,15 @@
 		value = apic_read(APIC_ESR);
 		Dprintk("ESR value after enabling vector: %08x\n", value);
 	} else {
-		if (esr_disable)	
-			/* 
-			 * Something untraceble is creating bad interrupts on 
+		if (esr_disable)
+			/*
+			 * Something untraceble is creating bad interrupts on
 			 * secondary quads ... for the moment, just leave the
 			 * ESR disabled - we can't do anything useful with the
 			 * errors anyway - mbligh
 			 */
 			printk("Leaving ESR disabled.\n");
-		else 
+		else
 			printk("No ESR for 82489DX.\n");
 	}
 
@@ -580,7 +580,7 @@
  * Detect and enable local APICs on non-SMP boards.
  * Original code written by Keir Fraser.
  * On AMD64 we trust the BIOS - if it says no APIC it is likely
- * not correctly set up (usually the APIC timer won't work etc.) 
+ * not correctly set up (usually the APIC timer won't work etc.)
  */
 
 static int __init detect_init_APIC (void)
@@ -683,19 +683,25 @@
 
 	local_irq_save(flags);
 
-	/* For some reasons this doesn't work on Simics, so fake it for now */ 
-	if (!strstr(boot_cpu_data.x86_model_id, "Screwdriver")) { 
+	/* For some reasons this doesn't work on Simics, so fake it for now */
+	if (!strstr(boot_cpu_data.x86_model_id, "Screwdriver")) {
 	__setup_APIC_LVTT(clocks);
 		return;
-	} 
+	}
 
 	/* wait for irq slice */
-	{
+	if (vxtime.hpet_address) {
+		int trigger = hpet_readl(HPET_T0_CMP);
+		while (hpet_readl(HPET_COUNTER) >= trigger)
+			/* do nothing */ ;
+		while (hpet_readl(HPET_COUNTER) <  trigger)
+			/* do nothing */ ;
+	} else {
 		int c1, c2;
 		outb_p(0x00, 0x43);
 		c2 = inb_p(0x40);
 		c2 |= inb_p(0x40) << 8;
-	do {
+		do {
 			c1 = c2;
 			outb_p(0x00, 0x43);
 			c2 = inb_p(0x40);
@@ -754,10 +760,10 @@
 
 void __init setup_boot_APIC_clock (void)
 {
-	if (disable_apic_timer) { 
-		printk(KERN_INFO "Disabling APIC timer\n"); 
-		return; 
-	} 
+	if (disable_apic_timer) {
+		printk(KERN_INFO "Disabling APIC timer\n");
+		return;
+	}
 
 	printk(KERN_INFO "Using local APIC timer interrupts.\n");
 	using_apic_timer = 1;
@@ -816,7 +822,7 @@
 	if ( (!multiplier) || (calibration_result/multiplier < 500))
 		return -EINVAL;
 
-	/* 
+	/*
 	 * Set the new multiplier for each CPU. CPUs don't start using the
 	 * new values until the next timer interrupt in which they do process
 	 * accounting. At that time they also adjust their APIC timers
@@ -856,7 +862,7 @@
 		 * Interrupts are already masked off at this point.
 		 */
 		per_cpu(prof_counter, cpu) = per_cpu(prof_multiplier, cpu);
-		if (per_cpu(prof_counter, cpu) != 
+		if (per_cpu(prof_counter, cpu) !=
 		    per_cpu(prof_old_multiplier, cpu)) {
 			__setup_APIC_LVTT(calibration_result/
 					per_cpu(prof_counter, cpu));
@@ -928,19 +934,19 @@
 		ack_APIC_irq();
 
 #if 0
-	static unsigned long last_warning; 
-	static unsigned long skipped; 
+	static unsigned long last_warning;
+	static unsigned long skipped;
 
 	/* see sw-dev-man vol 3, chapter 7.4.13.5 */
-	if (time_before(last_warning+30*HZ,jiffies)) { 
+	if (time_before(last_warning+30*HZ,jiffies)) {
 		printk(KERN_INFO "spurious APIC interrupt on CPU#%d, %ld skipped.\n",
 		       smp_processor_id(), skipped);
-		last_warning = jiffies; 
+		last_warning = jiffies;
 		skipped = 0;
-	} else { 
-		skipped++; 
-	} 
-#endif 
+	} else {
+		skipped++;
+	}
+#endif
 	irq_exit();
 }
 
@@ -975,7 +981,7 @@
 	irq_exit();
 }
 
-int disable_apic; 
+int disable_apic;
 
 /*
  * This initializes the IO-APIC and APIC hardware if this is
@@ -983,11 +989,11 @@
  */
 int __init APIC_init_uniprocessor (void)
 {
-	if (disable_apic) { 
+	if (disable_apic) {
 		printk(KERN_INFO "Apic disabled\n");
-		return -1; 
+		return -1;
 	}
-	if (!cpu_has_apic) { 
+	if (!cpu_has_apic) {
 		disable_apic = 1;
 		printk(KERN_INFO "Apic disabled by BIOS\n");
 		return -1;
@@ -1015,18 +1021,18 @@
 	return 0;
 }
 
-static __init int setup_disableapic(char *str) 
-{ 
+static __init int setup_disableapic(char *str)
+{
 	disable_apic = 1;
 	return 0;
-} 
+}
 
-static __init int setup_noapictimer(char *str) 
-{ 
+static __init int setup_noapictimer(char *str)
+{
 	disable_apic_timer = 1;
 	return 0;
-} 
+}
 
-__setup("disableapic", setup_disableapic); 
-__setup("noapictimer", setup_noapictimer); 
+__setup("disableapic", setup_disableapic);
+__setup("noapictimer", setup_noapictimer);
 
diff -Nru a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
--- a/arch/x86_64/kernel/smpboot.c	Wed Jun 11 13:08:09 2003
+++ b/arch/x86_64/kernel/smpboot.c	Wed Jun 11 13:08:09 2003
@@ -67,6 +67,8 @@
 /* Set when the idlers are all forked */
 int smp_threads_ready;
 
+extern void time_init_smp(void);
+
 /*
  * Trampoline 80x86 program as an array.
  */
@@ -760,7 +762,7 @@
 		if (APIC_init_uniprocessor())
 			printk(KERN_NOTICE "Local APIC not detected."
 					   " Using dummy APIC emulation.\n");
-		return;
+		goto smp_done;
 	}
 
 	/*
@@ -784,7 +786,7 @@
 		cpu_online_map = phys_cpu_present_map = 1;
 		phys_cpu_present_map = 1;
 		disable_apic = 1;
-		return;
+		goto smp_done;
 	}
 
 	verify_local_APIC();
@@ -799,7 +801,7 @@
 		cpu_online_map = phys_cpu_present_map = 1;
 		phys_cpu_present_map = 1;
 		disable_apic = 1;
-		return;
+		goto smp_done;
 	}
 
 	connect_bsp_APIC();
@@ -883,6 +885,9 @@
 	 */
 	if (cpu_has_tsc && cpucount)
 		synchronize_tsc_bp();
+
+ smp_done:
+	time_init_smp();
 }
 
 /* These are wrappers to interface to the new boot process.  Someone
diff -Nru a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
--- a/arch/x86_64/kernel/time.c	Wed Jun 11 13:08:09 2003
+++ b/arch/x86_64/kernel/time.c	Wed Jun 11 13:08:09 2003
@@ -24,6 +24,7 @@
 #include <linux/module.h>
 #include <linux/device.h>
 #include <linux/bcd.h>
+#include <asm/pgtable.h>
 #include <asm/vsyscall.h>
 #include <asm/timex.h>
 #ifdef CONFIG_X86_LOCAL_APIC
@@ -35,41 +36,61 @@
 extern int using_apic_timer;
 
 spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
+spinlock_t i8253_lock = SPIN_LOCK_UNLOCKED;
 
 extern int using_apic_timer;
 extern void smp_local_timer_interrupt(struct pt_regs * regs);
 
+#undef HPET_HACK_ENABLE_DANGEROUS
 
-unsigned int cpu_khz;					/* TSC clocks / usec, not used here */
-unsigned long hpet_period;				/* fsecs / HPET clock */
-unsigned long hpet_tick;				/* HPET clocks / interrupt */
-int hpet_report_lost_ticks;				/* command line option */
 
-struct hpet_data __hpet __section_hpet;			/* address, quotient, trigger, hz */
+unsigned int cpu_khz;				/* TSC clocks / usec, not used here */
+unsigned long hpet_period;			/* fsecs / HPET clock */
+unsigned long hpet_tick;			/* HPET clocks / interrupt */
+unsigned long vxtime_hz = 1193182;
+int report_lost_ticks;				/* command line option */
+
+struct vxtime_data __vxtime __section_vxtime;	/* for vsyscalls */
 
 volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
 unsigned long __wall_jiffies __section_wall_jiffies = INITIAL_JIFFIES;
 struct timespec __xtime __section_xtime;
 struct timezone __sys_tz __section_sys_tz;
 
+static inline void rdtscll_sync(unsigned long *tsc)
+{
+#ifdef CONFIG_SMP
+	sync_core();
+#endif
+	rdtscll(*tsc);
+}
+
 /*
  * do_gettimeoffset() returns microseconds since last timer interrupt was
  * triggered by hardware. A memory read of HPET is slower than a register read
  * of TSC, but much more reliable. It's also synchronized to the timer
  * interrupt. Note that do_gettimeoffset() may return more than hpet_tick, if a
- * timer interrupt has happened already, but hpet.trigger wasn't updated yet.
+ * timer interrupt has happened already, but vxtime.trigger wasn't updated yet.
  * This is not a problem, because jiffies hasn't updated either. They are bound
  * together by xtime_lock.
-         */
+ */
 
-inline unsigned int do_gettimeoffset(void)
+static inline unsigned int do_gettimeoffset_tsc(void)
 {
 	unsigned long t;
-	sync_core();
-	rdtscll(t);	
-	return (t  - hpet.last_tsc) * (1000000L / HZ) / hpet.ticks + hpet.offset;
+	unsigned long x;
+	rdtscll_sync(&t);
+	x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> 32;
+	return x;
+}
+
+static inline unsigned int do_gettimeoffset_hpet(void)
+{
+	return ((hpet_readl(HPET_COUNTER) - vxtime.last) * vxtime.quot) >> 32;
 }
 
+unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc;
+
 /*
  * This version of gettimeofday() has microsecond resolution and better than
  * microsecond precision, as we're using at least a 10 MHz (usually 14.31818
@@ -87,7 +108,8 @@
 		sec = xtime.tv_sec;
 		usec = xtime.tv_nsec / 1000;
 
-		t = (jiffies - wall_jiffies) * (1000000L / HZ) + do_gettimeoffset();
+		t = (jiffies - wall_jiffies) * (1000000L / HZ) +
+			do_gettimeoffset();
 		usec += t;
 
 	} while (read_seqretry(&xtime_lock, seq));
@@ -169,17 +191,17 @@
 	real_seconds = nowtime % 60;
 	real_minutes = nowtime / 60;
 	if (((abs(real_minutes - cmos_minutes) + 15) / 30) & 1)
-		real_minutes += 30;		/* correct for half hour time zone */
+		real_minutes += 30;	/* correct for half hour time zone */
 	real_minutes %= 60;
 
 	if (abs(real_minutes - cmos_minutes) < 30) {
-			BIN_TO_BCD(real_seconds);
-			BIN_TO_BCD(real_minutes);
+		BIN_TO_BCD(real_seconds);
+		BIN_TO_BCD(real_minutes);
 		CMOS_WRITE(real_seconds, RTC_SECONDS);
 		CMOS_WRITE(real_minutes, RTC_MINUTES);
 	} else
-		printk(KERN_WARNING "time.c: can't update CMOS clock from %d to %d\n",
-		       cmos_minutes, real_minutes);
+		printk(KERN_WARNING "time.c: can't update CMOS clock "
+		       "from %d to %d\n", cmos_minutes, real_minutes);
 
 /*
  * The following flags have to be released exactly in this order, otherwise the
@@ -198,27 +220,65 @@
 static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 {
 	static unsigned long rtc_update = 0;
+	unsigned long tsc, lost = 0;
+	int delay, offset = 0;
 
 /*
  * Here we are in the timer irq handler. We have irqs locally disabled (so we
  * don't need spin_lock_irqsave()) but we don't know if the timer_bh is running
  * on the other CPU, so we need a lock. We also need to lock the vsyscall
  * variables, because both do_timer() and us change them -arca+vojtech
-	 */
+ */
 
 	write_seqlock(&xtime_lock);
 
-	{
-		unsigned long t;
+	if (vxtime.hpet_address) {
+		offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
+		delay = hpet_readl(HPET_COUNTER) - offset;
+	} else {
+		spin_lock(&i8253_lock);
+		outb_p(0x00, 0x43);
+		delay = inb_p(0x40);
+		delay |= inb(0x40) << 8;
+		spin_unlock(&i8253_lock);
+		delay = LATCH - 1 - delay;
+	}
+
+	rdtscll_sync(&tsc);
 
-		sync_core();
-		rdtscll(t);
-		hpet.offset = (t  - hpet.last_tsc) * (1000000L / HZ) / hpet.ticks + hpet.offset - 1000000L / HZ;
-		if (hpet.offset >= 1000000L / HZ)
-			hpet.offset = 0;
-		hpet.ticks = min_t(long, max_t(long, (t  - hpet.last_tsc) * (1000000L / HZ) / (1000000L / HZ - hpet.offset),
-				cpu_khz * 1000/HZ * 15 / 16), cpu_khz * 1000/HZ * 16 / 15); 
-		hpet.last_tsc = t;
+	if (vxtime.mode == VXTIME_HPET) {
+		if (offset - vxtime.last > hpet_tick) {
+			lost = (offset - vxtime.last) / hpet_tick - 1;
+		}
+
+		vxtime.last = offset;
+	} else {
+		offset = (((tsc - vxtime.last_tsc) *
+			   vxtime.tsc_quot) >> 32) - tick_usec;
+
+		if (offset > tick_usec) {
+			lost = offset / tick_usec;
+			offset %= tick_usec;
+		}
+
+		vxtime.last_tsc = tsc - vxtime.quot * delay / vxtime.tsc_quot;
+
+		if ((((tsc - vxtime.last_tsc) *
+		      vxtime.tsc_quot) >> 32) < offset)
+			vxtime.last_tsc = tsc -
+				(((long) offset << 32) / vxtime.tsc_quot) - 1;
+	}
+
+	if (lost) {
+		if (report_lost_ticks)
+			printk(KERN_WARNING "time.c: Lost %ld timer "
+			       "tick(s)! (rip %016lx)\n",
+			       (offset - vxtime.last) / hpet_tick - 1,
+			       regs->rip);
+		// XXX The accounting of lost ticks is way off right
+		// now. -bos
+
+		// jiffies += lost;
 	}
 
 /*
@@ -244,16 +304,16 @@
  * If we have an externally synchronized Linux clock, then update CMOS clock
  * accordingly every ~11 minutes. set_rtc_mmss() will be called in the jiffy
  * closest to exactly 500 ms before the next second. If the update fails, we
- * don'tcare, as it'll be updated on the next turn, and the problem (time way
+ * don't care, as it'll be updated on the next turn, and the problem (time way
  * off) isn't likely to go away much sooner anyway.
  */
 
 	if ((~time_status & STA_UNSYNC) && xtime.tv_sec > rtc_update &&
-		abs(xtime.tv_nsec - 500000000) <= tick_nsec / 2) {
+	    abs(xtime.tv_nsec - 500000000) <= tick_nsec / 2) {
 		set_rtc_mmss(xtime.tv_sec);
 		rtc_update = xtime.tv_sec + 660;
 	}
- 
+
 	write_sequnlock(&xtime_lock);
 
 	return IRQ_HANDLED;
@@ -263,6 +323,7 @@
 {
 	unsigned int timeout, year, mon, day, hour, min, sec;
 	unsigned char last, this;
+	unsigned long flags;
 
 /*
  * The Linux interpretation of the CMOS clock register contents: When the
@@ -272,7 +333,7 @@
  * standard 8.3 MHz ISA bus.
  */
 
-	spin_lock(&rtc_lock); 
+	spin_lock_irqsave(&rtc_lock, flags);
 
 	timeout = 1000000;
 	last = this = 0;
@@ -286,28 +347,28 @@
 /*
  * Here we are safe to assume the registers won't change for a whole second, so
  * we just go ahead and read them.
-	 */
+ */
 
-		sec = CMOS_READ(RTC_SECONDS);
-		min = CMOS_READ(RTC_MINUTES);
-		hour = CMOS_READ(RTC_HOURS);
-		day = CMOS_READ(RTC_DAY_OF_MONTH);
-		mon = CMOS_READ(RTC_MONTH);
-		year = CMOS_READ(RTC_YEAR);
+	sec = CMOS_READ(RTC_SECONDS);
+	min = CMOS_READ(RTC_MINUTES);
+	hour = CMOS_READ(RTC_HOURS);
+	day = CMOS_READ(RTC_DAY_OF_MONTH);
+	mon = CMOS_READ(RTC_MONTH);
+	year = CMOS_READ(RTC_YEAR);
 
-	spin_unlock(&rtc_lock);
+	spin_unlock_irqrestore(&rtc_lock, flags);
 
 /*
  * We know that x86-64 always uses BCD format, no need to check the config
  * register.
  */
 
-	    BCD_TO_BIN(sec);
-	    BCD_TO_BIN(min);
-	    BCD_TO_BIN(hour);
-	    BCD_TO_BIN(day);
-	    BCD_TO_BIN(mon);
-	    BCD_TO_BIN(year);
+	BCD_TO_BIN(sec);
+	BCD_TO_BIN(min);
+	BCD_TO_BIN(hour);
+	BCD_TO_BIN(day);
+	BCD_TO_BIN(mon);
+	BCD_TO_BIN(year);
 
 /*
  * This will work up to Dec 31, 2069.
@@ -326,6 +387,32 @@
 
 #define TICK_COUNT 100000000
 
+static unsigned int __init hpet_calibrate_tsc(void)
+{
+	int tsc_start, hpet_start;
+	int tsc_now, hpet_now;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	local_irq_disable();
+
+	hpet_start = hpet_readl(HPET_COUNTER);
+	rdtscl(tsc_start);
+
+	do {
+		local_irq_disable();
+		hpet_now = hpet_readl(HPET_COUNTER);
+		sync_core();
+		rdtscl(tsc_now);
+		local_irq_restore(flags);
+	} while ((tsc_now - tsc_start) < TICK_COUNT &&
+		 (hpet_now - hpet_start) < TICK_COUNT);
+
+	return (tsc_now - tsc_start) * 1000000000L
+		/ ((hpet_now - hpet_start) * hpet_period / 1000);
+}
+
+
 /*
  * pit_calibrate_tsc() uses the speaker output (channel 2) of
  * the PIT. This is better than using the timer interrupt output,
@@ -339,10 +426,9 @@
 	unsigned long start, end;
 	unsigned long flags;
 
-	outb((inb(0x61) & ~0x02) | 0x01, 0x61);
+	spin_lock_irqsave(&i8253_lock, flags);
 
-	local_irq_save(flags);
-	local_irq_disable();
+	outb((inb(0x61) & ~0x02) | 0x01, 0x61);
 
 	outb(0xb0, 0x43);
 	outb((1193182 / (1000 / 50)) & 0xff, 0x42);
@@ -353,42 +439,145 @@
 	sync_core();
 	rdtscll(end);
 
+	spin_unlock_irqrestore(&i8253_lock, flags);
 
-	local_irq_restore(flags);
-	
 	return (end - start) / 50;
 }
 
+static int hpet_init(void)
+{
+	unsigned int cfg, id;
+
+	if (!vxtime.hpet_address)
+		return -1;
+	set_fixmap_nocache(FIX_HPET_BASE, vxtime.hpet_address);
+
+/*
+ * Read the period, compute tick and quotient.
+ */
+
+	id = hpet_readl(HPET_ID);
+
+	if (!(id & HPET_ID_VENDOR) || !(id & HPET_ID_NUMBER) ||
+	    !(id & HPET_ID_LEGSUP))
+		return -1;
+
+	hpet_period = hpet_readl(HPET_PERIOD);
+	if (hpet_period < 100000 || hpet_period > 100000000)
+		return -1;
+
+	hpet_tick = (tick_nsec + hpet_period / 2) / hpet_period;
+
+/*
+ * Stop the timers and reset the main counter.
+ */
+
+	cfg = hpet_readl(HPET_CFG);
+	cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY);
+	hpet_writel(cfg, HPET_CFG);
+	hpet_writel(0, HPET_COUNTER);
+	hpet_writel(0, HPET_COUNTER + 4);
+
+/*
+ * Set up timer 0, as periodic with first interrupt to happen at hpet_tick,
+ * and period also hpet_tick.
+ */
+
+	hpet_writel(HPET_T0_ENABLE | HPET_T0_PERIODIC | HPET_T0_SETVAL |
+		    HPET_T0_32BIT, HPET_T0_CFG);
+	hpet_writel(hpet_tick, HPET_T0_CMP);
+	hpet_writel(hpet_tick, HPET_T0_CMP);
+
+/*
+ * Go!
+ */
+
+	cfg |= HPET_CFG_ENABLE | HPET_CFG_LEGACY;
+	hpet_writel(cfg, HPET_CFG);
+
+	return 0;
+}
+
 void __init pit_init(void)
 {
+	unsigned long flags;
+
+	spin_lock_irqsave(&i8253_lock, flags);
 	outb_p(0x34, 0x43);		/* binary, mode 2, LSB/MSB, ch 0 */
 	outb_p(LATCH & 0xff, 0x40);	/* LSB */
 	outb_p(LATCH >> 8, 0x40);	/* MSB */
+	spin_unlock_irqrestore(&i8253_lock, flags);
 }
 
 int __init time_setup(char *str)
 {
-	hpet_report_lost_ticks = 1;
+	report_lost_ticks = 1;
 	return 1;
 }
 
-static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT, 0, "timer", NULL, NULL};
+static struct irqaction irq0 = {
+	timer_interrupt, SA_INTERRUPT, 0, "timer", NULL, NULL
+};
 
 extern void __init config_acpi_tables(void);
 
 void __init time_init(void)
 {
+	char *timename;
+
+#ifdef HPET_HACK_ENABLE_DANGEROUS
+        if (!vxtime.hpet_address) {
+		printk(KERN_WARNING "time.c: WARNING: Enabling HPET base "
+		       "manually!\n");
+                outl(0x800038a0, 0xcf8);
+                outl(0xff000001, 0xcfc);
+                outl(0x800038a0, 0xcf8);
+                hpet_address = inl(0xcfc) & 0xfffffffe;
+		printk(KERN_WARNING "time.c: WARNING: Enabled HPET "
+		       "at %#lx.\n", hpet_address);
+        }
+#endif
+
 	xtime.tv_sec = get_cmos_time();
 	xtime.tv_nsec = 0;
 
-	pit_init();
-	printk(KERN_INFO "time.c: Using 1.1931816 MHz PIT timer.\n");
-	cpu_khz = pit_calibrate_tsc();
+	if (!hpet_init()) {
+                vxtime_hz = (1000000000000000L + hpet_period / 2) /
+			hpet_period;
+		cpu_khz = hpet_calibrate_tsc();
+		timename = "HPET";
+	} else {
+		pit_init();
+		cpu_khz = pit_calibrate_tsc();
+		timename = "PIT";
+	}
+
+	printk(KERN_INFO "time.c: Using %ld.%06ld MHz %s timer.\n",
+	       vxtime_hz / 1000000, vxtime_hz % 1000000, timename);
 	printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n",
 		cpu_khz / 1000, cpu_khz % 1000);
-	hpet.ticks = cpu_khz * (1000 / HZ);
-	rdtscll(hpet.last_tsc);
+	vxtime.mode = VXTIME_TSC;
+	vxtime.quot = (1000000L << 32) / vxtime_hz;
+	vxtime.tsc_quot = (1000L << 32) / cpu_khz;
+	vxtime.hz = vxtime_hz;
+	rdtscll_sync(&vxtime.last_tsc);
 	setup_irq(0, &irq0);
+}
+
+void __init time_init_smp(void)
+{
+	char *timetype;
+
+	if (vxtime.hpet_address) {
+		timetype = "HPET";
+		vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick;
+		vxtime.mode = VXTIME_HPET;
+		do_gettimeoffset = do_gettimeoffset_hpet;
+	} else {
+		timetype = "PIT/TSC";
+		vxtime.mode = VXTIME_TSC;
+	}
+	printk(KERN_INFO "time.c: Using %s based timekeeping.\n", timetype);
 }
 
 __setup("report_lost_ticks", time_setup);
diff -Nru a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
--- a/arch/x86_64/kernel/vsyscall.c	Wed Jun 11 13:08:09 2003
+++ b/arch/x86_64/kernel/vsyscall.c	Wed Jun 11 13:08:09 2003
@@ -33,7 +33,7 @@
  *
  * Add HPET support (port from 2.4). Still needed?
  * Nop out vsyscall syscall to avoid anchor for buffer overflows when sysctl off.
- * 
+ *
  * These are not urgent things that we need to address only before shipping the first
  * production binary kernels.
  */
@@ -77,14 +77,22 @@
 
 	do {
 		sequence = read_seqbegin(&__xtime_lock);
-		
-		sync_core();
-		rdtscll(t);
+
 		sec = __xtime.tv_sec;
 		usec = (__xtime.tv_nsec / 1000) +
-			(__jiffies - __wall_jiffies) * (1000000 / HZ) +
-			(t  - __hpet.last_tsc) * (1000000 / HZ) / __hpet.ticks + __hpet.offset;
+			(__jiffies - __wall_jiffies) * (1000000 / HZ);
 
+		if (__vxtime.mode == VXTIME_TSC) {
+			sync_core();
+			rdtscll(t);
+			usec += ((t - __vxtime.last_tsc) *
+				 __vxtime.tsc_quot) >> 32;
+		} else {
+#if 0
+			usec += ((readl(fix_to_virt(VSYSCALL_HPET) + 0xf0) -
+				  __vxtime.last) * __vxtime.quot) >> 32;
+#endif
+		}
 	} while (read_seqretry(&__xtime_lock, sequence));
 
 	tv->tv_sec = sec + usec / 1000000;
@@ -100,7 +108,7 @@
 static force_inline int gettimeofday(struct timeval *tv, struct timezone *tz)
 {
 	int ret;
-	asm volatile("syscall" 
+	asm volatile("syscall"
 		: "=a" (ret)
 		: "0" (__NR_gettimeofday),"D" (tv),"S" (tz) : __syscall_clobber );
 	return ret;
@@ -109,7 +117,7 @@
 static int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
 {
 	if (unlikely(!__sysctl_vsyscall))
-	return gettimeofday(tv,tz); 
+	return gettimeofday(tv,tz);
 	if (tv)
 		do_vgettimeofday(tv);
 	if (tz)
@@ -119,13 +127,13 @@
 
 static time_t __vsyscall(1) vtime(time_t * t)
 {
-	struct timeval tv; 
+	struct timeval tv;
 	if (unlikely(!__sysctl_vsyscall))
 		gettimeofday(&tv, NULL);
 	else
 		do_vgettimeofday(&tv);
 	if (t)
-		*t = tv.tv_sec; 
+		*t = tv.tv_sec;
 	return tv.tv_sec;
 }
 
diff -Nru a/arch/x86_64/vmlinux.lds.S b/arch/x86_64/vmlinux.lds.S
--- a/arch/x86_64/vmlinux.lds.S	Wed Jun 11 13:08:09 2003
+++ b/arch/x86_64/vmlinux.lds.S	Wed Jun 11 13:08:09 2003
@@ -50,10 +50,10 @@
   .xtime_lock : AT ((LOADADDR(.vsyscall_0) + SIZEOF(.vsyscall_0) + 63) & ~(63)) { *(.xtime_lock) }
   xtime_lock = LOADADDR(.xtime_lock);
   . = ALIGN(16);
-  .hpet : AT ((LOADADDR(.xtime_lock) + SIZEOF(.xtime_lock) + 15) & ~(15)) { *(.hpet) }
-  hpet = LOADADDR(.hpet);
+  .vxtime : AT ((LOADADDR(.xtime_lock) + SIZEOF(.xtime_lock) + 15) & ~(15)) { *(.vxtime) }
+  vxtime = LOADADDR(.vxtime);
   . = ALIGN(16);
-  .wall_jiffies : AT ((LOADADDR(.hpet) + SIZEOF(.hpet) + 15) & ~(15)) { *(.wall_jiffies) }
+  .wall_jiffies : AT ((LOADADDR(.vxtime) + SIZEOF(.vxtime) + 15) & ~(15)) { *(.wall_jiffies) }
   wall_jiffies = LOADADDR(.wall_jiffies);
   . = ALIGN(16);
   .sys_tz : AT ((LOADADDR(.wall_jiffies) + SIZEOF(.wall_jiffies) + 15) & ~(15)) { *(.sys_tz) }
diff -Nru a/include/asm-x86_64/fixmap.h b/include/asm-x86_64/fixmap.h
--- a/include/asm-x86_64/fixmap.h	Wed Jun 11 13:08:09 2003
+++ b/include/asm-x86_64/fixmap.h	Wed Jun 11 13:08:09 2003
@@ -35,6 +35,8 @@
 enum fixed_addresses {
 	VSYSCALL_LAST_PAGE,
 	VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1,
+	VSYSCALL_HPET,
+	FIX_HPET_BASE,
 #ifdef CONFIG_X86_LOCAL_APIC
 	FIX_APIC_BASE,	/* local (CPU) APIC) -- required for SMP or not */
 #endif
diff -Nru a/include/asm-x86_64/mc146818rtc.h b/include/asm-x86_64/mc146818rtc.h
--- a/include/asm-x86_64/mc146818rtc.h	Wed Jun 11 13:08:09 2003
+++ b/include/asm-x86_64/mc146818rtc.h	Wed Jun 11 13:08:09 2003
@@ -24,6 +24,11 @@
 outb_p((val),RTC_PORT(1)); \
 })
 
+#ifndef CONFIG_HPET_TIMER
 #define RTC_IRQ 8
+#else
+/* Temporary workaround due to IRQ routing problem. */
+#define RTC_IRQ 0
+#endif
 
 #endif /* _ASM_MC146818RTC_H */
diff -Nru a/include/asm-x86_64/timex.h b/include/asm-x86_64/timex.h
--- a/include/asm-x86_64/timex.h	Wed Jun 11 13:08:09 2003
+++ b/include/asm-x86_64/timex.h	Wed Jun 11 13:08:09 2003
@@ -30,6 +30,34 @@
 
 extern unsigned int cpu_khz;
 
-extern struct hpet_data hpet;
+/*
+ * Documentation on HPET can be found at:
+ *      http://www.intel.com/ial/home/sp/pcmmspec.htm
+ *      ftp://download.intel.com/ial/home/sp/mmts098.pdf
+ */
+
+#define HPET_ID		0x000
+#define HPET_PERIOD	0x004
+#define HPET_CFG	0x010
+#define HPET_STATUS	0x020
+#define HPET_COUNTER	0x0f0
+#define HPET_T0_CFG	0x100
+#define HPET_T0_CMP	0x108
+#define HPET_T0_ROUTE	0x110
+
+#define HPET_ID_VENDOR	0xffff0000
+#define HPET_ID_LEGSUP	0x00008000
+#define HPET_ID_NUMBER	0x00000f00
+#define HPET_ID_REV	0x000000ff
+
+#define HPET_CFG_ENABLE	0x001
+#define HPET_CFG_LEGACY	0x002
+
+#define HPET_T0_ENABLE		0x004
+#define HPET_T0_PERIODIC	0x008
+#define HPET_T0_SETVAL		0x040
+#define HPET_T0_32BIT		0x100
+
+extern struct vxtime_data vxtime;
 
 #endif
diff -Nru a/include/asm-x86_64/vsyscall.h b/include/asm-x86_64/vsyscall.h
--- a/include/asm-x86_64/vsyscall.h	Wed Jun 11 13:08:09 2003
+++ b/include/asm-x86_64/vsyscall.h	Wed Jun 11 13:08:09 2003
@@ -15,7 +15,7 @@
 
 #ifdef __KERNEL__
 
-#define __section_hpet __attribute__ ((unused, __section__ (".hpet"), aligned(16)))
+#define __section_vxtime __attribute__ ((unused, __section__ (".vxtime"), aligned(16)))
 #define __section_wall_jiffies __attribute__ ((unused, __section__ (".wall_jiffies"), aligned(16)))
 #define __section_jiffies __attribute__ ((unused, __section__ (".jiffies"), aligned(16)))
 #define __section_sys_tz __attribute__ ((unused, __section__ (".sys_tz"), aligned(16)))
@@ -23,22 +23,24 @@
 #define __section_xtime __attribute__ ((unused, __section__ (".xtime"), aligned(16)))
 #define __section_xtime_lock __attribute__ ((unused, __section__ (".xtime_lock"), aligned(L1_CACHE_BYTES)))
 
+#define VXTIME_TSC	1
+#define VXTIME_HPET	2
 
-struct hpet_data {
-	long address;		/* base address */
+struct vxtime_data {
+	long hpet_address;	/* HPET base address */
 	unsigned long hz;	/* HPET clocks / sec */
-	int trigger;		/* value at last interrupt */
 	int last;
-	int offset;
 	unsigned long last_tsc;
-	long ticks;
+	long quot;
+	long tsc_quot;
+	int mode;
 };
 
 #define hpet_readl(a)           readl(fix_to_virt(FIX_HPET_BASE) + a)
 #define hpet_writel(d,a)        writel(d, fix_to_virt(FIX_HPET_BASE) + a)
 
 /* vsyscall space (readonly) */
-extern struct hpet_data __hpet;
+extern struct vxtime_data __vxtime;
 extern struct timespec __xtime;
 extern volatile unsigned long __jiffies;
 extern unsigned long __wall_jiffies;
@@ -46,7 +48,7 @@
 extern seqlock_t __xtime_lock;
 
 /* kernel space (writeable) */
-extern struct hpet_data hpet;
+extern struct vxtime_data vxtime;
 extern unsigned long wall_jiffies;
 extern struct timezone sys_tz;
 extern int sysctl_vsyscall;

  reply	other threads:[~2003-06-11 19:58 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2003-06-11 18:50 [PATCH] New x86_64 time code for 2.5.70 Bryan O'Sullivan
2003-06-11 19:18 ` Andi Kleen
2003-06-11 19:56   ` Bryan O'Sullivan
2003-06-11 20:10     ` Bryan O'Sullivan [this message]
2003-06-11 21:23       ` john stultz
2003-06-11 21:36         ` Bryan O'Sullivan
2003-06-11 21:41           ` john stultz
2003-06-12  0:33           ` john stultz
2003-06-12  6:40             ` Vojtech Pavlik
2003-06-12 17:47             ` Bryan O'Sullivan
2003-06-12 19:39               ` john stultz
2003-06-12 19:55                 ` Vojtech Pavlik
2003-06-12 21:16                 ` Bryan O'Sullivan
2003-06-13  1:43                   ` john stultz
2003-06-12 19:48               ` Vojtech Pavlik
2003-06-11 20:41 ` Vojtech Pavlik
2003-06-11 20:44 ` john stultz
2003-06-11 21:32 ` Mika Penttilä
2003-06-11 21:28   ` Bryan O'Sullivan
2003-06-12  4:40     ` [discuss] " Andreas Jaeger
2003-06-12  6:42     ` Vojtech Pavlik
2003-06-12  6:47   ` Vojtech Pavlik

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1055362249.17154.86.camel@serpentine.internal.keyresearch.com \
    --to=bos@serpentine.com \
    --cc=ak@suse.de \
    --cc=discuss@x86-64.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=vojtech@suse.cz \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.