public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [RFC][PATCH] i386 x86-64 Eliminate Local APIC timer interrupt
@ 2005-04-30  0:26 Venkatesh Pallipadi
  2005-04-30  0:46 ` Zwane Mwaikambo
                   ` (4 more replies)
  0 siblings, 5 replies; 22+ messages in thread
From: Venkatesh Pallipadi @ 2005-04-30  0:26 UTC (permalink / raw)
  To: Andrew Morton, Linus Torvalds, mingo, linux-kernel
  Cc: Rajesh Shah, John Stultz, Andi Kleen, Asit K Mallick


Background: 
Local APIC timer stops functioning when CPU is in C3 state. As a
result the local APIC timer interrupt will fire at uncertain times, depending
on how long we spend in C3 state. And this has two side effects
* Idle balancing will not happen as we expect it to.
* Kernel statistics for idle time will not be proper (as we get less LAPIC
  interrupts when we are idle). This can result in confusing other parts of
  kernel (like ondemand cpufreq governor) which depends on this idle stats.


Proposed Fix: 
Attached is a prototype patch, that tries to eliminate the dependency on 
local APIC timer for update_process_times(). The patch gets rid of Local APIC 
timer altogether. We use the timer interrupt (IRQ 0) configured in 
broadcast mode in IOAPIC instead (Doesn't work with 8259). 
As changing anything related to basic timer interrupt is a little bit risky, 
I have a boot parameter currently ("useapictimer") to switch back to original 
local APIC timer way of doing things.

This may seem like a overkill to solve this particular problem. But, I feel
it simplifies things and will have other advantages:
* Should help dynamick tick as one has to change only global timer interrupt 
  freq with varying jiffies.
* Reduces one interrupt per jiffy. 
* One less interrupt source to worry about.


The patch handles i386 and x86-64.

BEFORE:
/proc/interrupts at random time
lcoyote1-32-SLES9:~ # cat /proc/interrupts 
           CPU0       CPU1       CPU2       CPU3       
  0:      32615      32194      32159      32166    IO-APIC-edge  timer
   :
   :
LOC:     128584     128699     128585     128699 
   :

AFTER:
/proc/interrupts at random time
lcoyote1-32-SLES9:~ # cat /proc/interrupts 
           CPU0       CPU1       CPU2       CPU3       
  0:     719237     718797     718913     718911    IO-APIC-edge  timer
   :
   :
LOC:          0          0          0          0 
   :


TBD: 
* This is tested in normal i386 and x86-64 hardware. I think that this scheme 
  should work on NUMAQ and other subarchitectures as well. But, haven't really 
  tested it on such hardware.


Comments/concerns?

Thanks,
Venki

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>

diff -purN linux-2.6.12-rc2-mm3/arch/i386/kernel/apic.c linux-2.6.12-rc2-mm3-new/arch/i386/kernel/apic.c
--- linux-2.6.12-rc2-mm3/arch/i386/kernel/apic.c	2005-04-30 07:20:19.500689424 -0700
+++ linux-2.6.12-rc2-mm3-new/arch/i386/kernel/apic.c	2005-04-30 07:24:37.064533768 -0700
@@ -707,6 +707,13 @@ static void apic_pm_activate(void) { }
  * Original code written by Keir Fraser.
  */
 
+static __init int setup_apictimer(char *str)
+{
+	using_apic_timer = 1;
+	return 0;
+}
+__setup("useapictimer", setup_apictimer);
+
 static int __init apic_set_verbosity(char *str)
 {
 	if (strcmp("debug", str) == 0)
@@ -1051,8 +1058,19 @@ static unsigned int calibration_result;
 
 void __init setup_boot_APIC_clock(void)
 {
+	/*
+	 * Special case: If we were not able to setup IOAPIC timer interrupt
+	 * to broadcast mode on an SMP capable system, then we have to use
+	 * local apic timer...
+	 */
+	if (!using_apic_timer && !timer_broadcast && (num_possible_cpus() > 1))
+		using_apic_timer = 1;
+
+	if (!using_apic_timer) {
+		apic_printk(APIC_VERBOSE, "Disabling APIC timer\n");
+		return;
+	}
 	apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n");
-	using_apic_timer = 1;
 
 	local_irq_disable();
 
@@ -1154,9 +1172,7 @@ inline void smp_local_timer_interrupt(st
 						per_cpu(prof_counter, cpu);
 		}
 
-#ifdef CONFIG_SMP
 		update_process_times(user_mode(regs));
-#endif
 	}
 
 	/*
diff -purN linux-2.6.12-rc2-mm3/arch/i386/kernel/io_apic.c linux-2.6.12-rc2-mm3-new/arch/i386/kernel/io_apic.c
--- linux-2.6.12-rc2-mm3/arch/i386/kernel/io_apic.c	2005-04-30 07:20:19.514687296 -0700
+++ linux-2.6.12-rc2-mm3-new/arch/i386/kernel/io_apic.c	2005-04-30 07:24:37.066533464 -0700
@@ -84,6 +84,8 @@ int vector_irq[NR_VECTORS] = { [0 ... NR
 #define vector_to_irq(vector)	(vector)
 #endif
 
+int timer_broadcast;
+
 /*
  * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
  * shared ISA-space IRQs, so we have to support them. We are super
@@ -221,6 +223,21 @@ static void clear_IO_APIC (void)
 			clear_IO_APIC_pin(apic, pin);
 }
 
+static void __init setup_IO_APIC_timer_broadcast(int pin)
+{
+	struct IO_APIC_route_entry entry;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	*(((int*)&entry) + 0) = io_apic_read(0, 0x10 + 2 * pin);
+	*(((int*)&entry) + 1) = io_apic_read(0, 0x11 + 2 * pin);
+	entry.delivery_mode = dest_Fixed;
+	entry.dest.logical.logical_dest = 0xff;
+	io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry) + 1));
+	io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry) + 0));
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
 static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
 {
 	unsigned long flags;
@@ -228,6 +245,9 @@ static void set_ioapic_affinity_irq(unsi
 	struct irq_pin_list *entry = irq_2_pin + irq;
 	unsigned int apicid_value;
 	
+	if (irq_desc[irq].status & IRQ_PER_CPU)
+		return;
+
 	apicid_value = cpu_mask_to_apicid(cpumask);
 	/* Prepare to do the io_apic_write */
 	apicid_value = apicid_value << 24;
@@ -2207,6 +2227,11 @@ static inline void check_timer(void)
 				setup_nmi();
 				enable_8259A_irq(0);
 			}
+			if (!using_apic_timer) {
+				timer_broadcast = 1;
+				irq_desc[0].status |= IRQ_PER_CPU;
+				setup_IO_APIC_timer_broadcast(pin1);
+			}
 			return;
 		}
 		clear_IO_APIC_pin(0, pin1);
diff -purN linux-2.6.12-rc2-mm3/arch/i386/kernel/nmi.c linux-2.6.12-rc2-mm3-new/arch/i386/kernel/nmi.c
--- linux-2.6.12-rc2-mm3/arch/i386/kernel/nmi.c	2005-04-30 07:20:19.520686384 -0700
+++ linux-2.6.12-rc2-mm3-new/arch/i386/kernel/nmi.c	2005-04-30 07:25:43.341458144 -0700
@@ -504,7 +504,11 @@ void nmi_watchdog_tick (struct pt_regs *
 	 */
 	int sum, cpu = smp_processor_id();
 
-	sum = per_cpu(irq_stat, cpu).apic_timer_irqs;
+	if (using_apic_timer)
+		sum = per_cpu(irq_stat, cpu).apic_timer_irqs;
+	else
+		sum = kstat_cpu(cpu).irqs[0];
+
 
 #ifdef CONFIG_KGDB
 	if (!in_kgdb(regs) && last_irq_sums[cpu] == sum) {
diff -purN linux-2.6.12-rc2-mm3/arch/i386/kernel/time.c linux-2.6.12-rc2-mm3-new/arch/i386/kernel/time.c
--- linux-2.6.12-rc2-mm3/arch/i386/kernel/time.c	2005-04-30 07:20:19.526685472 -0700
+++ linux-2.6.12-rc2-mm3-new/arch/i386/kernel/time.c	2005-04-30 07:24:37.067533312 -0700
@@ -297,14 +297,19 @@ irqreturn_t timer_interrupt(int irq, voi
 	 * CPU. We need to avoid to SMP race with it. NOTE: we don' t need
 	 * the irq version of write_lock because as just said we have irq
 	 * locally disabled. -arca
+	 * When timer interrupt is broadcast CPU0 becomes our timekeeper CPU
+	 * Side effect: CPU0 cannot be hot added/removed
 	 */
-	write_seqlock(&xtime_lock);
+	if (using_apic_timer || smp_processor_id() == 0) {
+		write_seqlock(&xtime_lock);
+		cur_timer->mark_offset();
+		do_timer_interrupt(irq, NULL, regs);
+		write_sequnlock(&xtime_lock);
+	}
 
-	cur_timer->mark_offset();
- 
-	do_timer_interrupt(irq, NULL, regs);
+	/* We don't need to grab xtime lock to handle per cpu schedule, etc */
+	do_timer_interrupt_hook_percpu(regs);
 
-	write_sequnlock(&xtime_lock);
 	return IRQ_HANDLED;
 }
 
diff -purN linux-2.6.12-rc2-mm3/arch/x86_64/kernel/apic.c linux-2.6.12-rc2-mm3-new/arch/x86_64/kernel/apic.c
--- linux-2.6.12-rc2-mm3/arch/x86_64/kernel/apic.c	2005-04-30 07:20:19.631669512 -0700
+++ linux-2.6.12-rc2-mm3-new/arch/x86_64/kernel/apic.c	2005-04-30 07:24:37.068533160 -0700
@@ -36,8 +36,6 @@
 
 int apic_verbosity;
 
-int disable_apic_timer __initdata;
-
 /* Using APIC to generate smp_local_timer_interrupt? */
 int using_apic_timer = 0;
 
@@ -790,13 +788,20 @@ static unsigned int calibration_result;
 
 void __init setup_boot_APIC_clock (void)
 {
-	if (disable_apic_timer) { 
+	/*
+	 * Special case: If we were not able to setup IOAPIC timer interrupt
+	 * to broadcast mode on an SMP capable system, then we have to use
+	 * local apic timer...
+	 */
+	if (!using_apic_timer && !timer_broadcast && (num_possible_cpus() > 1))
+		using_apic_timer = 1;
+
+	if (!using_apic_timer) { 
 		printk(KERN_INFO "Disabling APIC timer\n"); 
 		return; 
 	} 
 
 	printk(KERN_INFO "Using local APIC timer interrupts.\n");
-	using_apic_timer = 1;
 
 	local_irq_disable();
 
@@ -899,9 +904,7 @@ void smp_local_timer_interrupt(struct pt
 				per_cpu(prof_counter, cpu);
 		}
 
-#ifdef CONFIG_SMP
 		update_process_times(user_mode(regs));
-#endif
 	}
 
 	/*
@@ -1108,9 +1111,9 @@ static __init int setup_nolapic(char *st
 	return 0;
 } 
 
-static __init int setup_noapictimer(char *str) 
+static __init int setup_apictimer(char *str) 
 { 
-	disable_apic_timer = 1;
+	using_apic_timer = 1;
 	return 0;
 } 
 
@@ -1119,6 +1122,6 @@ static __init int setup_noapictimer(char
 __setup("disableapic", setup_disableapic); 
 __setup("nolapic", setup_nolapic);  /* same as disableapic, for compatibility */
 
-__setup("noapictimer", setup_noapictimer); 
+__setup("useapictimer", setup_apictimer); 
 
 /* no "lapic" flag - we only use the lapic when the BIOS tells us so. */
diff -purN linux-2.6.12-rc2-mm3/arch/x86_64/kernel/io_apic.c linux-2.6.12-rc2-mm3-new/arch/x86_64/kernel/io_apic.c
--- linux-2.6.12-rc2-mm3/arch/x86_64/kernel/io_apic.c	2005-04-30 07:20:19.636668752 -0700
+++ linux-2.6.12-rc2-mm3-new/arch/x86_64/kernel/io_apic.c	2005-04-30 07:24:37.070532856 -0700
@@ -75,6 +75,8 @@ int vector_irq[NR_VECTORS] = { [0 ... NR
 #define vector_to_irq(vector)	(vector)
 #endif
 
+int timer_broadcast;
+
 /*
  * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
  * shared ISA-space IRQs, so we have to support them. We are super
@@ -179,6 +181,21 @@ static void clear_IO_APIC (void)
 			clear_IO_APIC_pin(apic, pin);
 }
 
+static void __init setup_IO_APIC_timer_broadcast(int pin)
+{
+	struct IO_APIC_route_entry entry;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	*(((int*)&entry) + 0) = io_apic_read(0, 0x10 + 2 * pin);
+	*(((int*)&entry) + 1) = io_apic_read(0, 0x11 + 2 * pin);
+	entry.delivery_mode = dest_Fixed;
+	entry.dest.logical.logical_dest = 0xff;
+	io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry) + 1));
+	io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry) + 0));
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
 /*
  * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
  * specific CPU-side IRQs.
@@ -1349,6 +1366,9 @@ static void set_ioapic_affinity_irq(unsi
 	unsigned long flags;
 	unsigned int dest;
 
+	if (irq_desc[irq].status & IRQ_PER_CPU)
+		return;
+
 	dest = cpu_mask_to_apicid(mask);
 
 	/*
@@ -1640,6 +1660,11 @@ static inline void check_timer(void)
 				setup_nmi();
 				enable_8259A_irq(0);
 			}
+			if (!using_apic_timer) {
+				timer_broadcast = 1;
+				irq_desc[0].status |= IRQ_PER_CPU;
+				setup_IO_APIC_timer_broadcast(pin1);
+			}
 			return;
 		}
 		clear_IO_APIC_pin(0, pin1);
diff -purN linux-2.6.12-rc2-mm3/arch/x86_64/kernel/nmi.c linux-2.6.12-rc2-mm3-new/arch/x86_64/kernel/nmi.c
--- linux-2.6.12-rc2-mm3/arch/x86_64/kernel/nmi.c	2005-04-30 07:20:19.643667688 -0700
+++ linux-2.6.12-rc2-mm3-new/arch/x86_64/kernel/nmi.c	2005-04-30 07:24:37.070532856 -0700
@@ -389,7 +389,11 @@ void nmi_watchdog_tick (struct pt_regs *
 	int sum, cpu;
 
 	cpu = safe_smp_processor_id();
-	sum = read_pda(apic_timer_irqs);
+	if (using_apic_timer)
+		sum = read_pda(apic_timer_irqs);
+	else
+		sum = kstat_cpu(cpu).irqs[0];
+
 	if (last_irq_sums[cpu] == sum) {
 		/*
 		 * Ayiee, looks like this CPU is stuck ...
diff -purN linux-2.6.12-rc2-mm3/arch/x86_64/kernel/time.c linux-2.6.12-rc2-mm3-new/arch/x86_64/kernel/time.c
--- linux-2.6.12-rc2-mm3/arch/x86_64/kernel/time.c	2005-04-30 07:20:19.650666624 -0700
+++ linux-2.6.12-rc2-mm3-new/arch/x86_64/kernel/time.c	2005-04-30 07:24:37.071532704 -0700
@@ -358,7 +358,7 @@ static noinline void handle_lost_ticks(i
 #endif
 }
 
-static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+static void do_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 {
 	static unsigned long rtc_update = 0;
 	unsigned long tsc;
@@ -423,27 +423,10 @@ static irqreturn_t timer_interrupt(int i
 		jiffies += lost;
 	}
 
-/*
- * Do the timer stuff.
- */
-
+	/*
+	 * Do the timer stuff.
+	 */
 	do_timer(regs);
-#ifndef CONFIG_SMP
-	update_process_times(user_mode(regs));
-#endif
-
-/*
- * In the SMP case we use the local APIC timer interrupt to do the profiling,
- * except when we simulate SMP mode on a uniprocessor system, in that case we
- * have to call the local interrupt handler.
- */
-
-#ifndef CONFIG_X86_LOCAL_APIC
-	profile_tick(CPU_PROFILING, regs);
-#else
-	if (!using_apic_timer)
-		smp_local_timer_interrupt(regs);
-#endif
 
 /*
  * If we have an externally synchronized Linux clock, then update CMOS clock
@@ -461,6 +444,31 @@ static irqreturn_t timer_interrupt(int i
  
 	write_sequnlock(&xtime_lock);
 
+	return;
+}
+
+static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+	/*
+	 * CPU0 becomes our timekeeper CPU
+	 * Side effect: CPU0 cannot be hot added/removed
+	 */
+	if (using_apic_timer || smp_processor_id() == 0)
+		do_timer_interrupt(irq, dev_id, regs);
+
+        /*
+	 * In case we are using local APIC timer interrupt these calls
+	 * will be done there.
+	 */
+#ifndef CONFIG_X86_LOCAL_APIC
+	update_process_times(user_mode(regs));
+	profile_tick(CPU_PROFILING, regs);
+#else
+	if (!using_apic_timer) {
+		update_process_times(user_mode(regs));
+		profile_tick(CPU_PROFILING, regs);
+	}
+#endif
 	return IRQ_HANDLED;
 }
 
diff -purN linux-2.6.12-rc2-mm3/include/asm-i386/io_apic.h linux-2.6.12-rc2-mm3-new/include/asm-i386/io_apic.h
--- linux-2.6.12-rc2-mm3/include/asm-i386/io_apic.h	2005-03-01 23:38:13.000000000 -0800
+++ linux-2.6.12-rc2-mm3-new/include/asm-i386/io_apic.h	2005-04-30 07:24:37.072532552 -0700
@@ -13,6 +13,8 @@
 
 #ifdef CONFIG_X86_IO_APIC
 
+extern int timer_broadcast;
+
 #ifdef CONFIG_PCI_MSI
 static inline int use_pci_vector(void)	{return 1;}
 static inline void disable_edge_ioapic_vector(unsigned int vector) { }
diff -purN linux-2.6.12-rc2-mm3/include/asm-i386/mach-default/do_timer.h linux-2.6.12-rc2-mm3-new/include/asm-i386/mach-default/do_timer.h
--- linux-2.6.12-rc2-mm3/include/asm-i386/mach-default/do_timer.h	2005-03-01 23:38:26.000000000 -0800
+++ linux-2.6.12-rc2-mm3-new/include/asm-i386/mach-default/do_timer.h	2005-04-30 07:24:37.072532552 -0700
@@ -16,23 +16,36 @@
 static inline void do_timer_interrupt_hook(struct pt_regs *regs)
 {
 	do_timer(regs);
-#ifndef CONFIG_SMP
-	update_process_times(user_mode(regs));
-#endif
-/*
- * In the SMP case we use the local APIC timer interrupt to do the
- * profiling, except when we simulate SMP mode on a uniprocessor
- * system, in that case we have to call the local interrupt handler.
- */
+}
+
+
+/**
+ * do_timer_interrupt_hook_percpu - hook into timer tick for each cpu
+ * @regs:	standard registers from interrupt
+ *
+ * Description:
+ *	It's primary purpose is to allow architectures that don't use
+ *	individual per CPU clocks (like the CPU APICs supply) to handle
+ *	timer interrupt as a means of triggering reschedules etc.
+ **/
+
+static inline void do_timer_interrupt_hook_percpu(struct pt_regs *regs)
+{
+	/*
+	 * In case we are using local APIC timer interrupt these calls
+	 * will be done there.
+	 */
 #ifndef CONFIG_X86_LOCAL_APIC
+	update_process_times(user_mode(regs));
 	profile_tick(CPU_PROFILING, regs);
 #else
-	if (!using_apic_timer)
-		smp_local_timer_interrupt(regs);
+	if (!using_apic_timer) {
+		update_process_times(user_mode(regs));
+		profile_tick(CPU_PROFILING, regs);
+	}
 #endif
 }
 
-
 /* you can safely undefine this if you don't have the Neptune chipset */
 
 #define BUGGY_NEPTUN_TIMER
diff -purN linux-2.6.12-rc2-mm3/include/asm-i386/mach-visws/do_timer.h linux-2.6.12-rc2-mm3-new/include/asm-i386/mach-visws/do_timer.h
--- linux-2.6.12-rc2-mm3/include/asm-i386/mach-visws/do_timer.h	2005-03-01 23:37:53.000000000 -0800
+++ linux-2.6.12-rc2-mm3-new/include/asm-i386/mach-visws/do_timer.h	2005-04-30 07:24:37.072532552 -0700
@@ -9,15 +9,13 @@ static inline void do_timer_interrupt_ho
 	co_cpu_write(CO_CPU_STAT,co_cpu_read(CO_CPU_STAT) & ~CO_STAT_TIMEINTR);
 
 	do_timer(regs);
-#ifndef CONFIG_SMP
-	update_process_times(user_mode(regs));
-#endif
 /*
  * In the SMP case we use the local APIC timer interrupt to do the
  * profiling, except when we simulate SMP mode on a uniprocessor
  * system, in that case we have to call the local interrupt handler.
  */
 #ifndef CONFIG_X86_LOCAL_APIC
+	update_process_times(user_mode(regs));
 	profile_tick(CPU_PROFILING, regs);
 #else
 	if (!using_apic_timer)
@@ -25,6 +23,17 @@ static inline void do_timer_interrupt_ho
 #endif
 }
 
+/**
+ * do_timer_interrupt_hook_percpu - hook into timer tick for each cpu
+ * @regs:	standard registers from interrupt
+ *
+ * Description:
+ *	It's primary purpose is to allow architectures that don't use
+ *	individual per CPU clocks (like the CPU APICs supply) to handle
+ *	timer interrupt as a means of triggering reschedules etc.
+ **/
+static inline void do_timer_interrupt_hook_percpu(struct pt_regs *regs) {}
+
 static inline int do_timer_overflow(int count)
 {
 	int i;
diff -purN linux-2.6.12-rc2-mm3/include/asm-i386/mach-voyager/do_timer.h linux-2.6.12-rc2-mm3-new/include/asm-i386/mach-voyager/do_timer.h
--- linux-2.6.12-rc2-mm3/include/asm-i386/mach-voyager/do_timer.h	2005-03-01 23:38:17.000000000 -0800
+++ linux-2.6.12-rc2-mm3-new/include/asm-i386/mach-voyager/do_timer.h	2005-04-30 07:24:37.073532400 -0700
@@ -11,6 +11,17 @@ static inline void do_timer_interrupt_ho
 	voyager_timer_interrupt(regs);
 }
 
+/**
+ * do_timer_interrupt_hook_percpu - hook into timer tick for each cpu
+ * @regs:	standard registers from interrupt
+ *
+ * Description:
+ *	It's primary purpose is to allow architectures that don't use
+ *	individual per CPU clocks (like the CPU APICs supply) to handle
+ *	timer interrupt as a means of triggering reschedules etc.
+ **/
+static inline void do_timer_interrupt_hook_percpu(struct pt_regs *regs) {}
+
 static inline int do_timer_overflow(int count)
 {
 	/* can't read the ISR, just assume 1 tick
diff -purN linux-2.6.12-rc2-mm3/include/asm-x86_64/io_apic.h linux-2.6.12-rc2-mm3-new/include/asm-x86_64/io_apic.h
--- linux-2.6.12-rc2-mm3/include/asm-x86_64/io_apic.h	2005-03-01 23:37:49.000000000 -0800
+++ linux-2.6.12-rc2-mm3-new/include/asm-x86_64/io_apic.h	2005-04-30 07:24:37.073532400 -0700
@@ -13,6 +13,8 @@
 
 #ifdef CONFIG_X86_IO_APIC
 
+extern int timer_broadcast;
+
 #ifdef CONFIG_PCI_MSI
 static inline int use_pci_vector(void)	{return 1;}
 static inline void disable_edge_ioapic_vector(unsigned int vector) { }

^ permalink raw reply	[flat|nested] 22+ messages in thread
* RE: [RFC][PATCH] i386 x86-64 Eliminate Local APIC timer interrupt
@ 2005-04-30  2:43 Pallipadi, Venkatesh
  2005-05-05  4:16 ` Len Brown
  0 siblings, 1 reply; 22+ messages in thread
From: Pallipadi, Venkatesh @ 2005-04-30  2:43 UTC (permalink / raw)
  To: Zwane Mwaikambo
  Cc: Andrew Morton, Linus Torvalds, mingo, linux-kernel, Shah, Rajesh,
	John Stultz, Andi Kleen, Mallick, Asit K

 

>-----Original Message-----
>From: Zwane Mwaikambo [mailto:zwane@arm.linux.org.uk] 
>Sent: Friday, April 29, 2005 6:13 PM
>To: Pallipadi, Venkatesh
>Cc: Andrew Morton; Linus Torvalds; mingo@elte.hu; 
>linux-kernel; Shah, Rajesh; John Stultz; Andi Kleen; Mallick, Asit K
>Subject: Re: [RFC][PATCH] i386 x86-64 Eliminate Local APIC 
>timer interrupt
>
>On Fri, 29 Apr 2005, Venkatesh Pallipadi wrote:
>
>> Proposed Fix: 
>> Attached is a prototype patch, that tries to eliminate the 
>dependency on 
>> local APIC timer for update_process_times(). The patch gets 
>rid of Local APIC 
>> timer altogether. We use the timer interrupt (IRQ 0) configured in 
>> broadcast mode in IOAPIC instead (Doesn't work with 8259). 
>> As changing anything related to basic timer interrupt is a 
>little bit risky, 
>> I have a boot parameter currently ("useapictimer") to switch 
>back to original 
>> local APIC timer way of doing things.
>
>I'm rather reluctant to advocate the broadcast scheme as i can see it 
>breaking on a lot of systems, e.g. SMP systems which use i8259 (as you 
>noted), IBM x440 and ES7000. If anything the default mode 
>should be APIC 
>timer and have a parameter to disable it.

The patch as it is should handle 8259 case using the regular APIC timer.
It only adds broadcast when IOAPIC is used for timer interrupt.

And if broadcast doesn't work on IBM x440 and ES7000, it can easily 
be handled by sub-arch, to use local APIC.

> Regarding things like variable 
>timer ticks, reprogramming the PIT is slow, and using it 
>extensively for 
>such sounds like a step in the wrong direction. 

Variable tick should come into picture only when system is totally idle
(for a long time). The algorithm that change ticks should handle the 
trade-off between frequent HZ interrupt when system is idle and overhead
Of reprogramming PIT/HPET. And variable HZ is already changing PIT if I 
Remember correctly. This patch doesn't add any complexity there.

> Is this feature/bug going to proliferate amongst newer processor 
> local APICs?

This APIC timer stopping in C3 will affect all CPUs that have C3 or 
deeper state. 

Although I agree that changing the things like timer interrupt is like 
walking on a landmine, given all different kind of hardware present, 
in general this seems simplify things related to timer interrupts.

Thanks,
Venki

^ permalink raw reply	[flat|nested] 22+ messages in thread
* RE: [RFC][PATCH] i386 x86-64 Eliminate Local APIC timer interrupt
@ 2005-04-30  2:55 Pallipadi, Venkatesh
  2005-04-30  3:06 ` Andrew Morton
  0 siblings, 1 reply; 22+ messages in thread
From: Pallipadi, Venkatesh @ 2005-04-30  2:55 UTC (permalink / raw)
  To: Andrew Morton
  Cc: torvalds, mingo, linux-kernel, Shah, Rajesh, johnstul, ak,
	Mallick, Asit K

 

>-----Original Message-----
>From: Andrew Morton [mailto:akpm@osdl.org] 
>Sent: Friday, April 29, 2005 7:33 PM
>To: Pallipadi, Venkatesh
>Cc: torvalds@osdl.org; mingo@elte.hu; 
>linux-kernel@vger.kernel.org; Shah, Rajesh; 
>johnstul@us.ibm.com; ak@suse.de; Mallick, Asit K
>Subject: Re: [RFC][PATCH] i386 x86-64 Eliminate Local APIC 
>timer interrupt
>
>The patch (at least, as I merged it) goes into a ghastly death 
>spiral early
>in boot.
>
>Serial console says:
>
>
>Initializing CPU#1
>Calibrating delay using timer specific routine.. 5615.95 
>BogoMIPS (lpj=2807978)
>CPU: Trace cache: 12K uops, L1 D cache: 8K
>CPU: L2 cache: 512K
>CPU: Physical Processor ID: 0
>CPU1: Intel Pentium 4 (Northwood) stepping 07
>Total of 2 processors activated (11238.26 BogoMIPS).
>ENABLING IO-APIC IRQs
>..TIMER: vector=0x31 pin1=2 pin2=-1
>checking TSC synchronization across 2 CPUs: passed.
>Brought up 2 CPUs
>Unable to handle kernel NULL pointer dereference
>
>which isn't very helpful.
>
>tty output is at 
>http://www.zip.com.au/~akpm/linux/patches/stuff/dsc02506.jpg
>
>which is also less that totally useful.
>
>There's waaaaaaaaay too much low-level x86 stuff happening at 
>present.  We
>need to settle it down, go more slowly, take more care and test things
>better, please.  Next -mm has already been delayed by two days 
>due to my
>having to chase down all the bugs people have been sending me.
>

I did test this patch on variety of systems and didn't see any failures.

Probably some other change in mm conflicting with this patch? 
Is there way to get the all the patches in mm, so that I can try same
Kernel and reproduce this failure?

I agree though that this patch is very risky and needs some discussion 
and lot of testing before it goes into base.

Thanks,
Venki

^ permalink raw reply	[flat|nested] 22+ messages in thread
* RE: [RFC][PATCH] i386 x86-64 Eliminate Local APIC timer interrupt
@ 2005-04-30 19:40 Protasevich, Natalie
  0 siblings, 0 replies; 22+ messages in thread
From: Protasevich, Natalie @ 2005-04-30 19:40 UTC (permalink / raw)
  To: Zwane Mwaikambo, Venkatesh Pallipadi
  Cc: Andrew Morton, Linus Torvalds, mingo, linux-kernel, Rajesh Shah,
	John Stultz, Andi Kleen, Asit K Mallick

> On Fri, 29 Apr 2005, Venkatesh Pallipadi wrote:
> 
> > Proposed Fix: 
> > Attached is a prototype patch, that tries to eliminate the 
> dependency 
> > on local APIC timer for update_process_times(). The patch 
> gets rid of 
> > Local APIC timer altogether. We use the timer interrupt (IRQ 0) 
> > configured in broadcast mode in IOAPIC instead (Doesn't 
> work with 8259).
> > As changing anything related to basic timer interrupt is a 
> little bit 
> > risky, I have a boot parameter currently ("useapictimer") to switch 
> > back to original local APIC timer way of doing things.
> 
> I'm rather reluctant to advocate the broadcast scheme as i 
> can see it breaking on a lot of systems, e.g. SMP systems 
> which use i8259 (as you noted), IBM x440 and ES7000. If 
> anything the default mode should be APIC timer and have a 
> parameter to disable it. Regarding things like variable timer 
> ticks, reprogramming the PIT is slow, and using it 
> extensively for such sounds like a step in the wrong 
> direction. Is this feature/bug going to proliferate amongst 
> newer processor local APICs?
> 
> Thanks,
> 	Zwane

I did preliminary testing of the patch applied to the rc3 on the IA-32
ES7000, and it booted fine, without the useapictimer boot option. As
Zwane pointed out correctly, ES7000 doesn't handle IRQ broadcast. The
kernel by-passed the non-apic timer option (chose pin1 in check_timer)
and came up safely with local APIC timer.
Thanks,
--Natalie

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2005-05-11 18:13 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-04-30  0:26 [RFC][PATCH] i386 x86-64 Eliminate Local APIC timer interrupt Venkatesh Pallipadi
2005-04-30  0:46 ` Zwane Mwaikambo
2005-04-30  0:58   ` Zwane Mwaikambo
2005-04-30  1:13 ` Zwane Mwaikambo
2005-04-30  2:32 ` Andrew Morton
2005-05-02 16:38 ` Andi Kleen
2005-05-02 17:16   ` Venkatesh Pallipadi
2005-05-02 19:08     ` Andi Kleen
2005-05-02 20:27       ` Venkatesh Pallipadi
2005-05-03 14:17         ` Andi Kleen
2005-05-05  5:33       ` Len Brown
2005-05-05 12:19         ` Andi Kleen
2005-05-11 18:12           ` Tony Lindgren
2005-05-05 20:45 ` George Anzinger
  -- strict thread matches above, loose matches on Subject: below --
2005-04-30  2:43 Pallipadi, Venkatesh
2005-05-05  4:16 ` Len Brown
2005-05-05 12:03   ` Andi Kleen
2005-05-05 12:32   ` Maciej W. Rozycki
2005-04-30  2:55 Pallipadi, Venkatesh
2005-04-30  3:06 ` Andrew Morton
2005-05-02 21:19   ` Pavel Machek
2005-04-30 19:40 Protasevich, Natalie

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox