All of lore.kernel.org
 help / color / mirror / Atom feed
From: john stultz <johnstul@us.ibm.com>
To: marcelo <marcelo@conectiva.com.br>, lkml <linux-kernel@vger.kernel.org>
Cc: Leah Cunningham <leahc@us.ibm.com>,
	wilhelm.nuesser@sap.com, paramjit@us.ibm.com
Subject: [PATCH] tsc-disable_B7
Date: 05 Aug 2002 15:58:07 -0700	[thread overview]
Message-ID: <1028588288.1073.42.camel@cog> (raw)

[-- Attachment #1: Type: text/plain, Size: 854 bytes --]

Marcelo,
	This patch enables a workaround for multi-node NUMA systems that are
experiencing gettimeofday returning "old" time values. Because these
systems are frequently driven by different crystals, the CPUs vary
slightly in frequency causing the TSCs to drift apart. Thus it is
possible for gettimeofday to return time values behind time values
already seen on another cpu. This patch allows people compiling w/
'Multi-node NUMA support' to pass "notsc" or "bad-tsc" as boot
parameters. "notsc" disables rdtsc calls, and forces the kernel to use
the PIT for gettimeofday calucluations (as normally expected w/ i386
compiled kernels). While "bad-tsc" forces the kernel to use the PIT for
gettimeofday, but does not disable TSC access. 

Thanks to Matt Wilson for suggestions on this revision.

Comments, suggestions and flames welcome.

thanks
-john



[-- Attachment #2: linux-2.4.19_tsc-disable_B7.patch --]
[-- Type: text/x-patch, Size: 6507 bytes --]

diff -Nru a/Documentation/Configure.help b/Documentation/Configure.help
--- a/Documentation/Configure.help	Mon Aug  5 15:41:40 2002
+++ b/Documentation/Configure.help	Mon Aug  5 15:41:40 2002
@@ -233,7 +233,21 @@
   network and embedded applications.  For more information see the
   Axis Communication site, <http://developer.axis.com/>.
 
-Multiquad support for NUMA systems
+Multi-node support for NUMA systems
+CONFIG_X86_NUMA
+  This option is used for getting Linux to run on a NUMA multi-node box. 
+  Because multi-node systems suffer from unsynced TSCs, as well as TSC 
+  drift, which can cause gettimeofday to return non-monotonic values, 
+  this option will turn off the CONFIG_X86_TSC optimization. This 
+  allows you to then specify "bad-tsc" as a boot option to force all nodes 
+  to use the PIT for gettimeofday. 
+  
+  Note: This does not disable access to the unsynced TSCs from userspace, 
+  thus applications using the rdtsc instruction for timing may have 
+  issues. To disable userspace access, instead of "bad-tsc" use the 
+  "notsc" boot option.
+  
+Multiquad support for NUMAQ systems
 CONFIG_MULTIQUAD
   This option is used for getting Linux to run on a (IBM/Sequent) NUMA 
   multiquad box. This changes the way that processors are bootstrapped,
diff -Nru a/arch/i386/config.in b/arch/i386/config.in
--- a/arch/i386/config.in	Mon Aug  5 15:41:40 2002
+++ b/arch/i386/config.in	Mon Aug  5 15:41:40 2002
@@ -80,20 +80,20 @@
    define_int  CONFIG_X86_L1_CACHE_SHIFT 5
    define_bool CONFIG_X86_USE_STRING_486 y
    define_bool CONFIG_X86_ALIGNMENT_16 y
-   define_bool CONFIG_X86_TSC y
+   define_bool CONFIG_X86_HAS_TSC y
    define_bool CONFIG_X86_PPRO_FENCE y
 fi
 if [ "$CONFIG_M586MMX" = "y" ]; then
    define_int  CONFIG_X86_L1_CACHE_SHIFT 5
    define_bool CONFIG_X86_USE_STRING_486 y
    define_bool CONFIG_X86_ALIGNMENT_16 y
-   define_bool CONFIG_X86_TSC y
+   define_bool CONFIG_X86_HAS_TSC y
    define_bool CONFIG_X86_GOOD_APIC y
    define_bool CONFIG_X86_PPRO_FENCE y
 fi
 if [ "$CONFIG_M686" = "y" ]; then
    define_int  CONFIG_X86_L1_CACHE_SHIFT 5
-   define_bool CONFIG_X86_TSC y
+   define_bool CONFIG_X86_HAS_TSC y
    define_bool CONFIG_X86_GOOD_APIC y
    define_bool CONFIG_X86_PGE y
    define_bool CONFIG_X86_USE_PPRO_CHECKSUM y
@@ -101,14 +101,14 @@
 fi
 if [ "$CONFIG_MPENTIUMIII" = "y" ]; then
    define_int  CONFIG_X86_L1_CACHE_SHIFT 5
-   define_bool CONFIG_X86_TSC y
+   define_bool CONFIG_X86_HAS_TSC y
    define_bool CONFIG_X86_GOOD_APIC y
    define_bool CONFIG_X86_PGE y
    define_bool CONFIG_X86_USE_PPRO_CHECKSUM y
 fi
 if [ "$CONFIG_MPENTIUM4" = "y" ]; then
    define_int  CONFIG_X86_L1_CACHE_SHIFT 7
-   define_bool CONFIG_X86_TSC y
+   define_bool CONFIG_X86_HAS_TSC y
    define_bool CONFIG_X86_GOOD_APIC y
    define_bool CONFIG_X86_PGE y
    define_bool CONFIG_X86_USE_PPRO_CHECKSUM y
@@ -116,12 +116,12 @@
 if [ "$CONFIG_MK6" = "y" ]; then
    define_int  CONFIG_X86_L1_CACHE_SHIFT 5
    define_bool CONFIG_X86_ALIGNMENT_16 y
-   define_bool CONFIG_X86_TSC y
+   define_bool CONFIG_X86_HAS_TSC y
    define_bool CONFIG_X86_USE_PPRO_CHECKSUM y
 fi
 if [ "$CONFIG_MK7" = "y" ]; then
    define_int  CONFIG_X86_L1_CACHE_SHIFT 6
-   define_bool CONFIG_X86_TSC y
+   define_bool CONFIG_X86_HAS_TSC y
    define_bool CONFIG_X86_GOOD_APIC y
    define_bool CONFIG_X86_USE_3DNOW y
    define_bool CONFIG_X86_PGE y
@@ -134,14 +134,14 @@
 fi
 if [ "$CONFIG_MCYRIXIII" = "y" ]; then
    define_int  CONFIG_X86_L1_CACHE_SHIFT 5
-   define_bool CONFIG_X86_TSC y
+   define_bool CONFIG_X86_HAS_TSC y
    define_bool CONFIG_X86_ALIGNMENT_16 y
    define_bool CONFIG_X86_USE_3DNOW y
    define_bool CONFIG_X86_USE_PPRO_CHECKSUM y
 fi
 if [ "$CONFIG_MCRUSOE" = "y" ]; then
    define_int  CONFIG_X86_L1_CACHE_SHIFT 5
-   define_bool CONFIG_X86_TSC y
+   define_bool CONFIG_X86_HAS_TSC y
 fi
 if [ "$CONFIG_MWINCHIPC6" = "y" ]; then
    define_int  CONFIG_X86_L1_CACHE_SHIFT 5
@@ -152,14 +152,14 @@
 if [ "$CONFIG_MWINCHIP2" = "y" ]; then
    define_int  CONFIG_X86_L1_CACHE_SHIFT 5
    define_bool CONFIG_X86_ALIGNMENT_16 y
-   define_bool CONFIG_X86_TSC y
+   define_bool CONFIG_X86_HAS_TSC y
    define_bool CONFIG_X86_USE_PPRO_CHECKSUM y
    define_bool CONFIG_X86_OOSTORE y
 fi
 if [ "$CONFIG_MWINCHIP3D" = "y" ]; then
    define_int  CONFIG_X86_L1_CACHE_SHIFT 5
    define_bool CONFIG_X86_ALIGNMENT_16 y
-   define_bool CONFIG_X86_TSC y
+   define_bool CONFIG_X86_HAS_TSC y
    define_bool CONFIG_X86_USE_PPRO_CHECKSUM y
    define_bool CONFIG_X86_OOSTORE y
 fi
@@ -198,8 +198,20 @@
       define_bool CONFIG_X86_IO_APIC y
    fi
 else
-   bool 'Multiquad NUMA system' CONFIG_MULTIQUAD
+	bool 'Multi-node NUMA system support' CONFIG_X86_NUMA
+	if [ "$CONFIG_X86_NUMA" = "y" ]; then
+		bool 'Multiquad (IBM/Sequent) NUMAQ support' CONFIG_MULTIQUAD
+	fi
 fi
+
+if [ "$CONFIG_X86_HAS_TSC" = "y" ]; then
+	if [ "$CONFIG_X86_NUMA" = "y" ]; then
+		define_bool CONFIG_X86_TSC n
+	else
+		define_bool CONFIG_X86_TSC y
+	fi
+fi
+
 
 if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then
    define_bool CONFIG_HAVE_DEC_LOCK y
diff -Nru a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
--- a/arch/i386/kernel/time.c	Mon Aug  5 15:41:40 2002
+++ b/arch/i386/kernel/time.c	Mon Aug  5 15:41:40 2002
@@ -569,6 +569,15 @@
 #define CALIBRATE_LATCH	(5 * LATCH)
 #define CALIBRATE_TIME	(5 * 1000020/HZ)
 
+static int  bad_tsc __initdata = 0;
+static int __init bad_tsc_setup(char *str)
+{
+	bad_tsc = 1;
+	return 1;
+}
+__setup("bad-tsc", bad_tsc_setup);
+
+
 static unsigned long __init calibrate_tsc(void)
 {
        /* Set the Gate high, disable speaker */
@@ -672,16 +681,17 @@
 		unsigned long tsc_quotient = calibrate_tsc();
 		if (tsc_quotient) {
 			fast_gettimeoffset_quotient = tsc_quotient;
-			use_tsc = 1;
 			/*
 			 *	We could be more selective here I suspect
 			 *	and just enable this for the next intel chips ?
 			 */
-			x86_udelay_tsc = 1;
-#ifndef do_gettimeoffset
-			do_gettimeoffset = do_fast_gettimeoffset;
-#endif
-
+			if(!bad_tsc){
+				use_tsc = 1;
+				x86_udelay_tsc = 1;
+				#ifndef do_gettimeoffset
+				do_gettimeoffset = do_fast_gettimeoffset;
+				#endif
+			}
 			/* report CPU clock rate in Hz.
 			 * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
 			 * clock/second. Our precision is about 100 ppm.

             reply	other threads:[~2002-08-05 23:08 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2002-08-05 22:58 john stultz [this message]
2002-08-05 23:21 ` [PATCH] tsc-disable_B7 Christoph Hellwig
2002-08-06  0:04   ` john stultz
2002-08-06  3:04     ` Martin J. Bligh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1028588288.1073.42.camel@cog \
    --to=johnstul@us.ibm.com \
    --cc=leahc@us.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=marcelo@conectiva.com.br \
    --cc=paramjit@us.ibm.com \
    --cc=wilhelm.nuesser@sap.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.