* [PATCH 1/4] Zero based percpu: Infrastructure to rebase the per cpu area to zero
2008-06-04 0:30 [PATCH 0/4] percpu: Optimize percpu accesses Mike Travis
@ 2008-06-04 0:30 ` Mike Travis
2008-06-10 10:06 ` Ingo Molnar
2008-06-04 0:30 ` [PATCH 2/4] x86: Extend percpu ops to 64 bit Mike Travis
` (3 subsequent siblings)
4 siblings, 1 reply; 119+ messages in thread
From: Mike Travis @ 2008-06-04 0:30 UTC (permalink / raw)
To: Ingo Molnar
Cc: Andrew Morton, Christoph Lameter, David Miller, Eric Dumazet,
Jeremy Fitzhardinge, linux-kernel
[-- Attachment #1: zero_based_infrastructure --]
[-- Type: text/plain, Size: 7044 bytes --]
* Support an option
CONFIG_HAVE_ZERO_BASED_PER_CPU
to make offsets for per cpu variables to start at zero.
If a percpu area starts at zero then:
- We do not need RELOC_HIDE anymore
- Provides for the future capability of architectures providing
a per cpu allocator that returns offsets instead of pointers.
The offsets would be independent of the processor so that
address calculations can be done in a processor independent way.
Per cpu instructions can then add the processor specific offset
at the last minute possibly in an atomic instruction.
The data the linker provides is different for zero based percpu segments:
__per_cpu_load -> The address at which the percpu area was loaded
__per_cpu_size -> The length of the per cpu area
* Removes the &__per_cpu_x in lockdep. The __per_cpu_x are already
pointers. There is no need to take the address.
* Updates kernel/module.c to be able to deal with a percpu area that
is loaded at __per_cpu_load but is accessed at __per_cpu_start.
Based on linux-2.6.tip
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Mike Travis <travis@sgi.com>
---
include/asm-generic/percpu.h | 9 ++++++++-
include/asm-generic/sections.h | 10 ++++++++++
include/asm-generic/vmlinux.lds.h | 16 ++++++++++++++++
include/linux/percpu.h | 17 ++++++++++++++++-
kernel/lockdep.c | 4 ++--
kernel/module.c | 7 ++++---
6 files changed, 56 insertions(+), 7 deletions(-)
--- linux-2.6.tip.orig/include/asm-generic/percpu.h
+++ linux-2.6.tip/include/asm-generic/percpu.h
@@ -45,7 +45,12 @@ extern unsigned long __per_cpu_offset[NR
* Only S390 provides its own means of moving the pointer.
*/
#ifndef SHIFT_PERCPU_PTR
-#define SHIFT_PERCPU_PTR(__p, __offset) RELOC_HIDE((__p), (__offset))
+# ifdef CONFIG_HAVE_ZERO_BASED_PER_CPU
+# define SHIFT_PERCPU_PTR(__p, __offset) \
+ ((__typeof(__p))(((void *)(__p)) + (__offset)))
+# else
+# define SHIFT_PERCPU_PTR(__p, __offset) RELOC_HIDE((__p), (__offset))
+# endif /* CONFIG_HAVE_ZERO_BASED_PER_CPU */
#endif
/*
@@ -70,6 +75,8 @@ extern void setup_per_cpu_areas(void);
#define per_cpu(var, cpu) (*((void)(cpu), &per_cpu_var(var)))
#define __get_cpu_var(var) per_cpu_var(var)
#define __raw_get_cpu_var(var) per_cpu_var(var)
+#define SHIFT_PERCPU_PTR(__p, __offset) (__p)
+#define per_cpu_offset(x) 0L
#endif /* SMP */
--- linux-2.6.tip.orig/include/asm-generic/sections.h
+++ linux-2.6.tip/include/asm-generic/sections.h
@@ -9,7 +9,17 @@ extern char __bss_start[], __bss_stop[];
extern char __init_begin[], __init_end[];
extern char _sinittext[], _einittext[];
extern char _end[];
+#ifdef CONFIG_HAVE_ZERO_BASED_PER_CPU
+extern char __per_cpu_load[];
+extern char ____per_cpu_size[];
+#define __per_cpu_size ((unsigned long)&____per_cpu_size)
+#define __per_cpu_start ((char *)0)
+#define __per_cpu_end ((char *)__per_cpu_size)
+#else
extern char __per_cpu_start[], __per_cpu_end[];
+#define __per_cpu_load __per_cpu_start
+#define __per_cpu_size (__per_cpu_end - __per_cpu_start)
+#endif
extern char __kprobes_text_start[], __kprobes_text_end[];
extern char __initdata_begin[], __initdata_end[];
extern char __start_rodata[], __end_rodata[];
--- linux-2.6.tip.orig/include/asm-generic/vmlinux.lds.h
+++ linux-2.6.tip/include/asm-generic/vmlinux.lds.h
@@ -371,6 +371,21 @@
*(.initcall7.init) \
*(.initcall7s.init)
+#ifdef CONFIG_HAVE_ZERO_BASED_PER_CPU
+#define PERCPU(align) \
+ . = ALIGN(align); \
+ percpu : { } :percpu \
+ __per_cpu_load = .; \
+ .data.percpu 0 : AT(__per_cpu_load - LOAD_OFFSET) { \
+ *(.data.percpu.first) \
+ *(.data.percpu.shared_aligned) \
+ *(.data.percpu) \
+ *(.data.percpu.page_aligned) \
+ ____per_cpu_size = .; \
+ } \
+ . = __per_cpu_load + ____per_cpu_size; \
+ data : { } :data
+#else
#define PERCPU(align) \
. = ALIGN(align); \
__per_cpu_start = .; \
@@ -380,3 +395,4 @@
*(.data.percpu.shared_aligned) \
} \
__per_cpu_end = .;
+#endif
--- linux-2.6.tip.orig/include/linux/percpu.h
+++ linux-2.6.tip/include/linux/percpu.h
@@ -27,7 +27,18 @@
#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \
__attribute__((__section__(".data.percpu.page_aligned"))) \
PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
+
+#ifdef CONFIG_HAVE_ZERO_BASED_PER_CPU
+#define DEFINE_PER_CPU_FIRST(type, name) \
+ __attribute__((__section__(".data.percpu.first"))) \
+ PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
#else
+#define DEFINE_PER_CPU_FIRST(type, name) \
+ DEFINE_PER_CPU(type, name)
+#endif
+
+#else /* !CONFIG_SMP */
+
#define DEFINE_PER_CPU(type, name) \
PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
@@ -36,7 +47,11 @@
#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \
DEFINE_PER_CPU(type, name)
-#endif
+
+#define DEFINE_PER_CPU_FIRST(type, name) \
+ DEFINE_PER_CPU(type, name)
+
+#endif /* !CONFIG_SMP */
#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
--- linux-2.6.tip.orig/kernel/lockdep.c
+++ linux-2.6.tip/kernel/lockdep.c
@@ -614,8 +614,8 @@ static int static_obj(void *obj)
* percpu var?
*/
for_each_possible_cpu(i) {
- start = (unsigned long) &__per_cpu_start + per_cpu_offset(i);
- end = (unsigned long) &__per_cpu_start + PERCPU_ENOUGH_ROOM
+ start = (unsigned long) __per_cpu_start + per_cpu_offset(i);
+ end = (unsigned long) __per_cpu_start + PERCPU_ENOUGH_ROOM
+ per_cpu_offset(i);
if ((addr >= start) && (addr < end))
--- linux-2.6.tip.orig/kernel/module.c
+++ linux-2.6.tip/kernel/module.c
@@ -45,6 +45,7 @@
#include <linux/unwind.h>
#include <asm/uaccess.h>
#include <asm/cacheflush.h>
+#include <asm/sections.h>
#include <linux/license.h>
#include <asm/sections.h>
#include <linux/marker.h>
@@ -367,7 +368,7 @@ static void *percpu_modalloc(unsigned lo
align = PAGE_SIZE;
}
- ptr = __per_cpu_start;
+ ptr = __per_cpu_load;
for (i = 0; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) {
/* Extra for alignment requirement. */
extra = ALIGN((unsigned long)ptr, align) - (unsigned long)ptr;
@@ -402,7 +403,7 @@ static void *percpu_modalloc(unsigned lo
static void percpu_modfree(void *freeme)
{
unsigned int i;
- void *ptr = __per_cpu_start + block_size(pcpu_size[0]);
+ void *ptr = __per_cpu_load + block_size(pcpu_size[0]);
/* First entry is core kernel percpu data. */
for (i = 1; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) {
@@ -453,7 +454,7 @@ static int percpu_modinit(void)
pcpu_size = kmalloc(sizeof(pcpu_size[0]) * pcpu_num_allocated,
GFP_KERNEL);
/* Static in-kernel percpu data (used). */
- pcpu_size[0] = -(__per_cpu_end-__per_cpu_start);
+ pcpu_size[0] = -__per_cpu_size;
/* Free room. */
pcpu_size[1] = PERCPU_ENOUGH_ROOM + pcpu_size[0];
if (pcpu_size[1] < 0) {
--
^ permalink raw reply [flat|nested] 119+ messages in thread* Re: [PATCH 1/4] Zero based percpu: Infrastructure to rebase the per cpu area to zero
2008-06-04 0:30 ` [PATCH 1/4] Zero based percpu: Infrastructure to rebase the per cpu area to zero Mike Travis
@ 2008-06-10 10:06 ` Ingo Molnar
0 siblings, 0 replies; 119+ messages in thread
From: Ingo Molnar @ 2008-06-10 10:06 UTC (permalink / raw)
To: Mike Travis
Cc: Andrew Morton, Christoph Lameter, David Miller, Eric Dumazet,
Jeremy Fitzhardinge, linux-kernel
* Mike Travis <travis@sgi.com> wrote:
> * Support an option
>
> CONFIG_HAVE_ZERO_BASED_PER_CPU
>
> to make offsets for per cpu variables to start at zero.
>
> If a percpu area starts at zero then:
>
> - We do not need RELOC_HIDE anymore
>
> - Provides for the future capability of architectures providing
> a per cpu allocator that returns offsets instead of pointers.
> The offsets would be independent of the processor so that
> address calculations can be done in a processor independent way.
> Per cpu instructions can then add the processor specific offset
> at the last minute possibly in an atomic instruction.
>
> The data the linker provides is different for zero based percpu segments:
>
> __per_cpu_load -> The address at which the percpu area was loaded
> __per_cpu_size -> The length of the per cpu area
>
> * Removes the &__per_cpu_x in lockdep. The __per_cpu_x are already
> pointers. There is no need to take the address.
>
> * Updates kernel/module.c to be able to deal with a percpu area that
> is loaded at __per_cpu_load but is accessed at __per_cpu_start.
>
> Based on linux-2.6.tip
applied to tip/core/percpu, thanks.
Ingo
^ permalink raw reply [flat|nested] 119+ messages in thread
* [PATCH 2/4] x86: Extend percpu ops to 64 bit
2008-06-04 0:30 [PATCH 0/4] percpu: Optimize percpu accesses Mike Travis
2008-06-04 0:30 ` [PATCH 1/4] Zero based percpu: Infrastructure to rebase the per cpu area to zero Mike Travis
@ 2008-06-04 0:30 ` Mike Travis
2008-06-10 10:04 ` Ingo Molnar
2008-06-04 0:30 ` [PATCH 3/4] x86_64: Fold pda into per cpu area Mike Travis
` (2 subsequent siblings)
4 siblings, 1 reply; 119+ messages in thread
From: Mike Travis @ 2008-06-04 0:30 UTC (permalink / raw)
To: Ingo Molnar
Cc: Andrew Morton, Christoph Lameter, David Miller, Eric Dumazet,
Jeremy Fitzhardinge, linux-kernel
[-- Attachment #1: zero_based_percpu_64bit --]
[-- Type: text/plain, Size: 3858 bytes --]
* x86 percpu ops now will work on 64 bit too, so add the missing 8 byte cases.
* Add a few atomic ops that will be useful in the future:
x86_xchg_percpu()
x86_cmpxchg_percpu().
x86_inc_percpu() - Increment by one can generate more efficient
x86_dec_percpu() instructions and inc/dec will be supported by
cpu ops later.
* Use per_cpu_var() instead of per_cpu__##xxx.
Based on linux-2.6.tip
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Mike Travis <travis@sgi.com>
---
include/asm-x86/percpu.h | 83 ++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 78 insertions(+), 5 deletions(-)
--- linux-2.6.tip.orig/include/asm-x86/percpu.h
+++ linux-2.6.tip/include/asm-x86/percpu.h
@@ -108,6 +108,11 @@ do { \
: "+m" (var) \
: "ri" ((T__)val)); \
break; \
+ case 8: \
+ asm(op "q %1,"__percpu_seg"%0" \
+ : "+m" (var) \
+ : "ri" ((T__)val)); \
+ break; \
default: __bad_percpu_size(); \
} \
} while (0)
@@ -131,16 +136,84 @@ do { \
: "=r" (ret__) \
: "m" (var)); \
break; \
+ case 8: \
+ asm(op "q "__percpu_seg"%1,%0" \
+ : "=r" (ret__) \
+ : "m" (var)); \
+ break; \
default: __bad_percpu_size(); \
} \
ret__; \
})
-#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var)
-#define x86_write_percpu(var, val) percpu_to_op("mov", per_cpu__##var, val)
-#define x86_add_percpu(var, val) percpu_to_op("add", per_cpu__##var, val)
-#define x86_sub_percpu(var, val) percpu_to_op("sub", per_cpu__##var, val)
-#define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val)
+#define percpu_addr_op(op, var) \
+({ \
+ switch (sizeof(var)) { \
+ case 1: \
+ asm(op "b "__percpu_seg"%0" \
+ : : "m"(var)); \
+ break; \
+ case 2: \
+ asm(op "w "__percpu_seg"%0" \
+ : : "m"(var)); \
+ break; \
+ case 4: \
+ asm(op "l "__percpu_seg"%0" \
+ : : "m"(var)); \
+ break; \
+ case 8: \
+ asm(op "q "__percpu_seg"%0" \
+ : : "m"(var)); \
+ break; \
+ default: __bad_percpu_size(); \
+ } \
+})
+
+#define percpu_cmpxchg_op(var, old, new) \
+({ \
+ typeof(var) prev; \
+ switch (sizeof(var)) { \
+ case 1: \
+ asm("cmpxchgb %b1, "__percpu_seg"%2" \
+ : "=a"(prev) \
+ : "q"(new), "m"(var), "0"(old) \
+ : "memory"); \
+ break; \
+ case 2: \
+ asm("cmpxchgw %w1, "__percpu_seg"%2" \
+ : "=a"(prev) \
+ : "r"(new), "m"(var), "0"(old) \
+ : "memory"); \
+ break; \
+ case 4: \
+ asm("cmpxchgl %k1, "__percpu_seg"%2" \
+ : "=a"(prev) \
+ : "r"(new), "m"(var), "0"(old) \
+ : "memory"); \
+ break; \
+ case 8: \
+ asm("cmpxchgq %1, "__percpu_seg"%2" \
+ : "=a"(prev) \
+ : "r"(new), "m"(var), "0"(old) \
+ : "memory"); \
+ break; \
+ default: \
+ __bad_percpu_size(); \
+ } \
+ return prev; \
+})
+
+#define x86_read_percpu(var) percpu_from_op("mov", per_cpu_var(var))
+#define x86_write_percpu(var, val) percpu_to_op("mov", per_cpu_var(var), val)
+#define x86_add_percpu(var, val) percpu_to_op("add", per_cpu_var(var), val)
+#define x86_sub_percpu(var, val) percpu_to_op("sub", per_cpu_var(var), val)
+#define x86_inc_percpu(var) percpu_addr_op("inc", per_cpu_var(var))
+#define x86_dec_percpu(var) percpu_addr_op("dec", per_cpu_var(var))
+#define x86_or_percpu(var, val) percpu_to_op("or", per_cpu_var(var), val)
+#define x86_xchg_percpu(var, val) percpu_to_op("xchg", per_cpu_var(var), val)
+#define x86_cmpxchg_percpu(var, old, new) \
+ percpu_cmpxchg_op(per_cpu_var(var), old, new)
+
#endif /* !__ASSEMBLY__ */
#endif /* !CONFIG_X86_64 */
--
^ permalink raw reply [flat|nested] 119+ messages in thread* Re: [PATCH 2/4] x86: Extend percpu ops to 64 bit
2008-06-04 0:30 ` [PATCH 2/4] x86: Extend percpu ops to 64 bit Mike Travis
@ 2008-06-10 10:04 ` Ingo Molnar
0 siblings, 0 replies; 119+ messages in thread
From: Ingo Molnar @ 2008-06-10 10:04 UTC (permalink / raw)
To: Mike Travis
Cc: Andrew Morton, Christoph Lameter, David Miller, Eric Dumazet,
Jeremy Fitzhardinge, linux-kernel
* Mike Travis <travis@sgi.com> wrote:
> * x86 percpu ops now will work on 64 bit too, so add the missing 8 byte cases.
>
> * Add a few atomic ops that will be useful in the future:
>
> x86_xchg_percpu()
> x86_cmpxchg_percpu().
>
> x86_inc_percpu() - Increment by one can generate more efficient
> x86_dec_percpu() instructions and inc/dec will be supported by
> cpu ops later.
>
> * Use per_cpu_var() instead of per_cpu__##xxx.
>
> Based on linux-2.6.tip
applied to tip/cpus4096, thanks Mike.
Ingo
^ permalink raw reply [flat|nested] 119+ messages in thread
* [PATCH 3/4] x86_64: Fold pda into per cpu area
2008-06-04 0:30 [PATCH 0/4] percpu: Optimize percpu accesses Mike Travis
2008-06-04 0:30 ` [PATCH 1/4] Zero based percpu: Infrastructure to rebase the per cpu area to zero Mike Travis
2008-06-04 0:30 ` [PATCH 2/4] x86: Extend percpu ops to 64 bit Mike Travis
@ 2008-06-04 0:30 ` Mike Travis
2008-06-04 12:59 ` Jeremy Fitzhardinge
2008-06-05 10:22 ` [crash, bisected] " Ingo Molnar
2008-06-04 0:30 ` [PATCH 4/4] x86: Replace xxx_pda() operations with x86_xx_percpu() Mike Travis
2008-06-04 10:18 ` [PATCH] x86: collapse the various size-dependent percpu accessors together Jeremy Fitzhardinge
4 siblings, 2 replies; 119+ messages in thread
From: Mike Travis @ 2008-06-04 0:30 UTC (permalink / raw)
To: Ingo Molnar
Cc: Andrew Morton, Christoph Lameter, David Miller, Eric Dumazet,
Jeremy Fitzhardinge, linux-kernel
[-- Attachment #1: zero_based_fold --]
[-- Type: text/plain, Size: 16555 bytes --]
* Declare the pda as a per cpu variable.
* Make the x86_64 per cpu area start at zero.
* Since the pda is now the first element of the per_cpu area, cpu_pda()
is no longer needed and per_cpu() can be used instead. This also makes
the _cpu_pda[] table obsolete.
* Since %gs is pointing to the pda, it will then also point to the per cpu
variables and can be accessed thusly:
%gs:[&per_cpu_xxxx - __per_cpu_start]
Based on linux-2.6.tip
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Mike Travis <travis@sgi.com>
---
arch/x86/Kconfig | 3 +
arch/x86/kernel/head64.c | 34 ++++++--------
arch/x86/kernel/irq_64.c | 36 ++++++++-------
arch/x86/kernel/setup.c | 90 ++++++++++++---------------------------
arch/x86/kernel/setup64.c | 5 --
arch/x86/kernel/smpboot.c | 51 ----------------------
arch/x86/kernel/traps_64.c | 11 +++-
arch/x86/kernel/vmlinux_64.lds.S | 1
include/asm-x86/percpu.h | 48 ++++++--------------
9 files changed, 89 insertions(+), 190 deletions(-)
--- linux-2.6.tip.orig/arch/x86/Kconfig
+++ linux-2.6.tip/arch/x86/Kconfig
@@ -129,6 +129,9 @@ config HAVE_SETUP_PER_CPU_AREA
config HAVE_CPUMASK_OF_CPU_MAP
def_bool X86_64_SMP
+config HAVE_ZERO_BASED_PER_CPU
+ def_bool X86_64_SMP
+
config ARCH_HIBERNATION_POSSIBLE
def_bool y
depends on !SMP || !X86_VOYAGER
--- linux-2.6.tip.orig/arch/x86/kernel/head64.c
+++ linux-2.6.tip/arch/x86/kernel/head64.c
@@ -25,20 +25,6 @@
#include <asm/e820.h>
#include <asm/bios_ebda.h>
-/* boot cpu pda */
-static struct x8664_pda _boot_cpu_pda __read_mostly;
-
-#ifdef CONFIG_SMP
-/*
- * We install an empty cpu_pda pointer table to indicate to early users
- * (numa_set_node) that the cpu_pda pointer table for cpus other than
- * the boot cpu is not yet setup.
- */
-static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata;
-#else
-static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly;
-#endif
-
static void __init zap_identity_mappings(void)
{
pgd_t *pgd = pgd_offset_k(0UL);
@@ -159,6 +145,20 @@ void __init x86_64_start_kernel(char * r
/* Cleanup the over mapped high alias */
cleanup_highmap();
+ /* point to boot pda which is the first element in the percpu area */
+ {
+ struct x8664_pda *pda;
+#ifdef CONFIG_SMP
+ pda = (struct x8664_pda *)__per_cpu_load;
+ pda->data_offset = per_cpu_offset(0) = (unsigned long)pda;
+#else
+ pda = &per_cpu(pda, 0);
+ pda->data_offset = (unsigned long)pda;
+#endif
+ }
+ /* initialize boot cpu_pda data */
+ pda_init(0);
+
for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) {
#ifdef CONFIG_EARLY_PRINTK
set_intr_gate(i, &early_idt_handlers[i]);
@@ -170,12 +170,6 @@ void __init x86_64_start_kernel(char * r
early_printk("Kernel alive\n");
- _cpu_pda = __cpu_pda;
- cpu_pda(0) = &_boot_cpu_pda;
- pda_init(0);
-
- early_printk("Kernel really alive\n");
-
copy_bootdata(__va(real_mode_data));
reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
--- linux-2.6.tip.orig/arch/x86/kernel/irq_64.c
+++ linux-2.6.tip/arch/x86/kernel/irq_64.c
@@ -115,39 +115,43 @@ skip:
} else if (i == NR_IRQS) {
seq_printf(p, "NMI: ");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count);
+ seq_printf(p, "%10u ", per_cpu(pda.__nmi_count, j));
seq_printf(p, " Non-maskable interrupts\n");
seq_printf(p, "LOC: ");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs);
+ seq_printf(p, "%10u ", per_cpu(pda.apic_timer_irqs, j));
seq_printf(p, " Local timer interrupts\n");
#ifdef CONFIG_SMP
seq_printf(p, "RES: ");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", cpu_pda(j)->irq_resched_count);
+ seq_printf(p, "%10u ",
+ per_cpu(pda.irq_resched_count, j));
seq_printf(p, " Rescheduling interrupts\n");
seq_printf(p, "CAL: ");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count);
+ seq_printf(p, "%10u ", per_cpu(pda.irq_call_count, j));
seq_printf(p, " function call interrupts\n");
seq_printf(p, "TLB: ");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count);
+ seq_printf(p, "%10u ", per_cpu(pda.irq_tlb_count, j));
seq_printf(p, " TLB shootdowns\n");
#endif
#ifdef CONFIG_X86_MCE
seq_printf(p, "TRM: ");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", cpu_pda(j)->irq_thermal_count);
+ seq_printf(p, "%10u ",
+ per_cpu(pda.irq_thermal_count, j));
seq_printf(p, " Thermal event interrupts\n");
seq_printf(p, "THR: ");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", cpu_pda(j)->irq_threshold_count);
+ seq_printf(p, "%10u ",
+ per_cpu(pda.irq_threshold_count, j));
seq_printf(p, " Threshold APIC interrupts\n");
#endif
seq_printf(p, "SPU: ");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", cpu_pda(j)->irq_spurious_count);
+ seq_printf(p, "%10u ",
+ per_cpu(pda.irq_spurious_count, j));
seq_printf(p, " Spurious interrupts\n");
seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
}
@@ -159,19 +163,19 @@ skip:
*/
u64 arch_irq_stat_cpu(unsigned int cpu)
{
- u64 sum = cpu_pda(cpu)->__nmi_count;
+ u64 sum = per_cpu(pda.__nmi_count, cpu);
- sum += cpu_pda(cpu)->apic_timer_irqs;
+ sum += per_cpu(pda.apic_timer_irqs, cpu);
#ifdef CONFIG_SMP
- sum += cpu_pda(cpu)->irq_resched_count;
- sum += cpu_pda(cpu)->irq_call_count;
- sum += cpu_pda(cpu)->irq_tlb_count;
+ sum += per_cpu(pda.irq_resched_count, cpu);
+ sum += per_cpu(pda.irq_call_count, cpu);
+ sum += per_cpu(pda.irq_tlb_count, cpu);
#endif
#ifdef CONFIG_X86_MCE
- sum += cpu_pda(cpu)->irq_thermal_count;
- sum += cpu_pda(cpu)->irq_threshold_count;
+ sum += per_cpu(pda.irq_thermal_count, cpu);
+ sum += per_cpu(pda.irq_threshold_count, cpu);
#endif
- sum += cpu_pda(cpu)->irq_spurious_count;
+ sum += per_cpu(pda.irq_spurious_count, cpu);
return sum;
}
--- linux-2.6.tip.orig/arch/x86/kernel/setup.c
+++ linux-2.6.tip/arch/x86/kernel/setup.c
@@ -29,6 +29,11 @@ DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_a
EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
+#ifdef CONFIG_X86_64
+DEFINE_PER_CPU_FIRST(struct x8664_pda, pda);
+EXPORT_PER_CPU_SYMBOL(pda);
+#endif
+
#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
#define X86_64_NUMA 1
@@ -47,7 +52,7 @@ static void __init setup_node_to_cpumask
static inline void setup_node_to_cpumask_map(void) { }
#endif
-#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_SMP)
+#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
/*
* Copy data used in early init routines from the initial arrays to the
* per cpu data areas. These arrays then become expendable and the
@@ -94,64 +99,9 @@ static void __init setup_cpumask_of_cpu(
static inline void setup_cpumask_of_cpu(void) { }
#endif
-#ifdef CONFIG_X86_32
-/*
- * Great future not-so-futuristic plan: make i386 and x86_64 do it
- * the same way
- */
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(__per_cpu_offset);
-static inline void setup_cpu_pda_map(void) { }
-
-#elif !defined(CONFIG_SMP)
-static inline void setup_cpu_pda_map(void) { }
-
-#else /* CONFIG_SMP && CONFIG_X86_64 */
-
-/*
- * Allocate cpu_pda pointer table and array via alloc_bootmem.
- */
-static void __init setup_cpu_pda_map(void)
-{
- char *pda;
- struct x8664_pda **new_cpu_pda;
- unsigned long size;
- int cpu;
-
- size = roundup(sizeof(struct x8664_pda), cache_line_size());
-
- /* allocate cpu_pda array and pointer table */
- {
- unsigned long tsize = nr_cpu_ids * sizeof(void *);
- unsigned long asize = size * (nr_cpu_ids - 1);
-
- tsize = roundup(tsize, cache_line_size());
- new_cpu_pda = alloc_bootmem(tsize + asize);
- pda = (char *)new_cpu_pda + tsize;
- }
- /* initialize pointer table to static pda's */
- for_each_possible_cpu(cpu) {
- if (cpu == 0) {
- /* leave boot cpu pda in place */
- new_cpu_pda[0] = cpu_pda(0);
- continue;
- }
- new_cpu_pda[cpu] = (struct x8664_pda *)pda;
- new_cpu_pda[cpu]->in_bootmem = 1;
- pda += size;
- }
-
- /* point to new pointer table */
- _cpu_pda = new_cpu_pda;
-}
-#endif
-
-/*
- * Great future plan:
- * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
- * Always point %gs to its beginning
- */
void __init setup_per_cpu_areas(void)
{
ssize_t size = PERCPU_ENOUGH_ROOM;
@@ -164,9 +114,6 @@ void __init setup_per_cpu_areas(void)
nr_cpu_ids = num_processors;
#endif
- /* Setup cpu_pda map */
- setup_cpu_pda_map();
-
/* Copy section for each CPU (we discard the original) */
size = PERCPU_ENOUGH_ROOM;
printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n",
@@ -186,9 +133,28 @@ void __init setup_per_cpu_areas(void)
else
ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
#endif
+ /* Initialize each cpu's per_cpu area and save pointer */
+ memcpy(ptr, __per_cpu_load, __per_cpu_size);
per_cpu_offset(cpu) = ptr - __per_cpu_start;
- memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
+#ifdef CONFIG_X86_64
+ /*
+ * Note the boot cpu has been using the static per_cpu load
+ * area for it's pda. We need to zero out the pda's for the
+ * other cpu's that are coming online.
+ */
+ {
+ /* we rely on the fact that pda is the first element */
+ struct x8664_pda *pda = (struct x8664_pda *)ptr;
+
+ if (cpu)
+ memset(pda, 0, sizeof(struct x8664_pda));
+ else
+ pda_init(0);
+
+ pda->data_offset = (unsigned long)ptr;
+ }
+#endif
}
printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n",
@@ -240,8 +206,8 @@ void __cpuinit numa_set_node(int cpu, in
{
int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
- if (cpu_pda(cpu) && node != NUMA_NO_NODE)
- cpu_pda(cpu)->nodenumber = node;
+ if (per_cpu_offset(cpu))
+ per_cpu(pda.nodenumber, cpu) = node;
if (cpu_to_node_map)
cpu_to_node_map[cpu] = node;
--- linux-2.6.tip.orig/arch/x86/kernel/setup64.c
+++ linux-2.6.tip/arch/x86/kernel/setup64.c
@@ -35,9 +35,6 @@ struct boot_params boot_params;
cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
-struct x8664_pda **_cpu_pda __read_mostly;
-EXPORT_SYMBOL(_cpu_pda);
-
struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned")));
@@ -89,7 +86,7 @@ __setup("noexec32=", nonx32_setup);
void pda_init(int cpu)
{
- struct x8664_pda *pda = cpu_pda(cpu);
+ struct x8664_pda *pda = &per_cpu(pda, cpu);
/* Setup up data that may be needed in __get_free_pages early */
asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0));
--- linux-2.6.tip.orig/arch/x86/kernel/smpboot.c
+++ linux-2.6.tip/arch/x86/kernel/smpboot.c
@@ -798,45 +798,6 @@ static void __cpuinit do_fork_idle(struc
complete(&c_idle->done);
}
-#ifdef CONFIG_X86_64
-/*
- * Allocate node local memory for the AP pda.
- *
- * Must be called after the _cpu_pda pointer table is initialized.
- */
-static int __cpuinit get_local_pda(int cpu)
-{
- struct x8664_pda *oldpda, *newpda;
- unsigned long size = sizeof(struct x8664_pda);
- int node = cpu_to_node(cpu);
-
- if (cpu_pda(cpu) && !cpu_pda(cpu)->in_bootmem)
- return 0;
-
- oldpda = cpu_pda(cpu);
- newpda = kmalloc_node(size, GFP_ATOMIC, node);
- if (!newpda) {
- printk(KERN_ERR "Could not allocate node local PDA "
- "for CPU %d on node %d\n", cpu, node);
-
- if (oldpda)
- return 0; /* have a usable pda */
- else
- return -1;
- }
-
- if (oldpda) {
- memcpy(newpda, oldpda, size);
- if (!after_bootmem)
- free_bootmem((unsigned long)oldpda, size);
- }
-
- newpda->in_bootmem = 0;
- cpu_pda(cpu) = newpda;
- return 0;
-}
-#endif /* CONFIG_X86_64 */
-
static int __cpuinit do_boot_cpu(int apicid, int cpu)
/*
* NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
@@ -860,14 +821,6 @@ static int __cpuinit do_boot_cpu(int api
printk(KERN_ERR "Failed to allocate GDT for CPU %d\n", cpu);
return -1;
}
-
- /* Allocate node local memory for AP pdas */
- if (cpu > 0) {
- boot_error = get_local_pda(cpu);
- if (boot_error)
- goto restore_state;
- /* if can't get pda memory, can't start cpu */
- }
#endif
alternatives_smp_switch(1);
@@ -908,7 +861,7 @@ do_rest:
stack_start.sp = (void *) c_idle.idle->thread.sp;
irq_ctx_init(cpu);
#else
- cpu_pda(cpu)->pcurrent = c_idle.idle;
+ per_cpu(pda.pcurrent, cpu) = c_idle.idle;
init_rsp = c_idle.idle->thread.sp;
load_sp0(&per_cpu(init_tss, cpu), &c_idle.idle->thread);
initial_code = (unsigned long)start_secondary;
@@ -985,8 +938,6 @@ do_rest:
}
}
-restore_state:
-
if (boot_error) {
/* Try to put things back the way they were before ... */
unmap_cpu_to_logical_apicid(cpu);
--- linux-2.6.tip.orig/arch/x86/kernel/traps_64.c
+++ linux-2.6.tip/arch/x86/kernel/traps_64.c
@@ -265,7 +265,8 @@ void dump_trace(struct task_struct *tsk,
const struct stacktrace_ops *ops, void *data)
{
const unsigned cpu = get_cpu();
- unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr;
+ unsigned long *irqstack_end =
+ (unsigned long*)per_cpu(pda.irqstackptr, cpu);
unsigned used = 0;
struct thread_info *tinfo;
@@ -399,8 +400,10 @@ _show_stack(struct task_struct *tsk, str
unsigned long *stack;
int i;
const int cpu = smp_processor_id();
- unsigned long *irqstack_end = (unsigned long *) (cpu_pda(cpu)->irqstackptr);
- unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
+ unsigned long *irqstack_end =
+ (unsigned long *)per_cpu(pda.irqstackptr, cpu);
+ unsigned long *irqstack =
+ (unsigned long *)(per_cpu(pda.irqstackptr, cpu) - IRQSTACKSIZE);
// debugging aid: "show_stack(NULL, NULL);" prints the
// back trace for this cpu.
@@ -464,7 +467,7 @@ void show_registers(struct pt_regs *regs
int i;
unsigned long sp;
const int cpu = smp_processor_id();
- struct task_struct *cur = cpu_pda(cpu)->pcurrent;
+ struct task_struct *cur = __get_cpu_var(pda.pcurrent);
u8 *ip;
unsigned int code_prologue = code_bytes * 43 / 64;
unsigned int code_len = code_bytes;
--- linux-2.6.tip.orig/arch/x86/kernel/vmlinux_64.lds.S
+++ linux-2.6.tip/arch/x86/kernel/vmlinux_64.lds.S
@@ -16,6 +16,7 @@ jiffies_64 = jiffies;
_proxy_pda = 1;
PHDRS {
text PT_LOAD FLAGS(5); /* R_E */
+ percpu PT_LOAD FLAGS(4); /* R__ */
data PT_LOAD FLAGS(7); /* RWE */
user PT_LOAD FLAGS(7); /* RWE */
data.init PT_LOAD FLAGS(7); /* RWE */
--- linux-2.6.tip.orig/include/asm-x86/percpu.h
+++ linux-2.6.tip/include/asm-x86/percpu.h
@@ -3,26 +3,20 @@
#ifdef CONFIG_X86_64
#include <linux/compiler.h>
-
-/* Same as asm-generic/percpu.h, except that we store the per cpu offset
- in the PDA. Longer term the PDA and every per cpu variable
- should be just put into a single section and referenced directly
- from %gs */
-
-#ifdef CONFIG_SMP
#include <asm/pda.h>
-#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset)
-#define __my_cpu_offset read_pda(data_offset)
-
-#define per_cpu_offset(x) (__per_cpu_offset(x))
-
+#ifdef CONFIG_SMP
+#define __my_cpu_offset (x86_read_percpu(pda.data_offset))
+#define __percpu_seg "%%gs:"
+#else
+#define __percpu_seg ""
#endif
+
#include <asm-generic/percpu.h>
DECLARE_PER_CPU(struct x8664_pda, pda);
-#else /* CONFIG_X86_64 */
+#else /* !CONFIG_X86_64 */
#ifdef __ASSEMBLY__
@@ -51,36 +45,23 @@ DECLARE_PER_CPU(struct x8664_pda, pda);
#else /* ...!ASSEMBLY */
-/*
- * PER_CPU finds an address of a per-cpu variable.
- *
- * Args:
- * var - variable name
- * cpu - 32bit register containing the current CPU number
- *
- * The resulting address is stored in the "cpu" argument.
- *
- * Example:
- * PER_CPU(cpu_gdt_descr, %ebx)
- */
#ifdef CONFIG_SMP
-
#define __my_cpu_offset x86_read_percpu(this_cpu_off)
-
-/* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */
#define __percpu_seg "%%fs:"
-
-#else /* !SMP */
-
+#else
#define __percpu_seg ""
-
-#endif /* SMP */
+#endif
#include <asm-generic/percpu.h>
/* We can use this directly for local CPU (faster). */
DECLARE_PER_CPU(unsigned long, this_cpu_off);
+#endif /* __ASSEMBLY__ */
+#endif /* !CONFIG_X86_64 */
+
+#ifndef __ASSEMBLY__
+
/* For arch-specific code, we can use direct single-insn ops (they
* don't give an lvalue though). */
extern void __bad_percpu_size(void);
@@ -215,7 +196,6 @@ do { \
percpu_cmpxchg_op(per_cpu_var(var), old, new)
#endif /* !__ASSEMBLY__ */
-#endif /* !CONFIG_X86_64 */
#ifdef CONFIG_SMP
--
^ permalink raw reply [flat|nested] 119+ messages in thread* Re: [PATCH 3/4] x86_64: Fold pda into per cpu area
2008-06-04 0:30 ` [PATCH 3/4] x86_64: Fold pda into per cpu area Mike Travis
@ 2008-06-04 12:59 ` Jeremy Fitzhardinge
2008-06-04 13:48 ` Mike Travis
2008-06-09 23:18 ` Christoph Lameter
2008-06-05 10:22 ` [crash, bisected] " Ingo Molnar
1 sibling, 2 replies; 119+ messages in thread
From: Jeremy Fitzhardinge @ 2008-06-04 12:59 UTC (permalink / raw)
To: Mike Travis
Cc: Ingo Molnar, Andrew Morton, Christoph Lameter, David Miller,
Eric Dumazet, linux-kernel, Rusty Russell
Mike Travis wrote:
> * Declare the pda as a per cpu variable.
>
> * Make the x86_64 per cpu area start at zero.
>
> * Since the pda is now the first element of the per_cpu area, cpu_pda()
> is no longer needed and per_cpu() can be used instead. This also makes
> the _cpu_pda[] table obsolete.
>
> * Since %gs is pointing to the pda, it will then also point to the per cpu
> variables and can be accessed thusly:
>
> %gs:[&per_cpu_xxxx - __per_cpu_start]
>
Unfortunately that doesn't actually work, because you can't have a reloc
with two variables.
In something like:
mov %gs:per_cpu__foo - 12345, %rax
mov %gs:per_cpu__foo, %rax
mov %gs:per_cpu__foo - 12345(%rip), %rax
mov %gs:per_cpu__foo(%rip), %rax
mov %gs:per_cpu__foo - __per_cpu_start, %rax
mov %gs:per_cpu__foo - __per_cpu_start(%rip), %rax
the last two lines will not assemble:
t.S:5: Error: can't resolve `per_cpu__foo' {*UND* section} - `__per_cpu_start' {*UND* section}
t.S:6: Error: can't resolve `per_cpu__foo' {*UND* section} - `__per_cpu_start' {*UND* section}
Unfortunately, the only way I can think of fixing this is to compute the
offset into a temp register, then use that:
lea per_cpu__foo(%rip), %rax
mov %gs:__per_cpu_offset(%rax), %rax
(where __per_cpu_offset is defined in the linker script as
-__per_cpu_start).
This seems to be a general problem with zero-offset per-cpu. And its
unfortunate, because no-register access to per-cpu variables is nice to
have.
The other alternative - and I have no idea whether this is practical or
possible - is to define a complete set of pre-offset per_cpu symbols.
J
^ permalink raw reply [flat|nested] 119+ messages in thread* Re: [PATCH 3/4] x86_64: Fold pda into per cpu area
2008-06-04 12:59 ` Jeremy Fitzhardinge
@ 2008-06-04 13:48 ` Mike Travis
2008-06-04 13:58 ` Jeremy Fitzhardinge
2008-06-09 23:18 ` Christoph Lameter
1 sibling, 1 reply; 119+ messages in thread
From: Mike Travis @ 2008-06-04 13:48 UTC (permalink / raw)
To: Jeremy Fitzhardinge
Cc: Ingo Molnar, Andrew Morton, Christoph Lameter, David Miller,
Eric Dumazet, linux-kernel, Rusty Russell
Jeremy Fitzhardinge wrote:
> Mike Travis wrote:
>> * Declare the pda as a per cpu variable.
>>
>> * Make the x86_64 per cpu area start at zero.
>>
>> * Since the pda is now the first element of the per_cpu area, cpu_pda()
>> is no longer needed and per_cpu() can be used instead. This also
>> makes
>> the _cpu_pda[] table obsolete.
>>
>> * Since %gs is pointing to the pda, it will then also point to the
>> per cpu
>> variables and can be accessed thusly:
>>
>> %gs:[&per_cpu_xxxx - __per_cpu_start]
>>
The above is only a partial story (I folded the two patches but didn't
update the comments correctly.] The variables are already offset from
__per_cpu_start by virtue of the .data.percpu section being based at
zero. Therefore only the %gs register needs to be set to the base of
each cpu's percpu section to resolve the target address:
%gs:&per_cpu_xxxx
And the .data.percpu.first forces the pda percpu variable to the front.
>
> Unfortunately that doesn't actually work, because you can't have a reloc
> with two variables.
>
> In something like:
>
> mov %gs:per_cpu__foo - 12345, %rax
> mov %gs:per_cpu__foo, %rax
> mov %gs:per_cpu__foo - 12345(%rip), %rax
> mov %gs:per_cpu__foo(%rip), %rax
> mov %gs:per_cpu__foo - __per_cpu_start, %rax
> mov %gs:per_cpu__foo - __per_cpu_start(%rip), %rax
>
> the last two lines will not assemble:
>
> t.S:5: Error: can't resolve `per_cpu__foo' {*UND* section} -
> `__per_cpu_start' {*UND* section}
> t.S:6: Error: can't resolve `per_cpu__foo' {*UND* section} -
> `__per_cpu_start' {*UND* section}
>
> Unfortunately, the only way I can think of fixing this is to compute the
> offset into a temp register, then use that:
>
> lea per_cpu__foo(%rip), %rax
> mov %gs:__per_cpu_offset(%rax), %rax
>
> (where __per_cpu_offset is defined in the linker script as
> -__per_cpu_start).
>
> This seems to be a general problem with zero-offset per-cpu. And its
> unfortunate, because no-register access to per-cpu variables is nice to
> have.
>
> The other alternative - and I have no idea whether this is practical or
> possible - is to define a complete set of pre-offset per_cpu symbols.
>
> J
^ permalink raw reply [flat|nested] 119+ messages in thread* Re: [PATCH 3/4] x86_64: Fold pda into per cpu area
2008-06-04 13:48 ` Mike Travis
@ 2008-06-04 13:58 ` Jeremy Fitzhardinge
2008-06-04 14:17 ` Mike Travis
0 siblings, 1 reply; 119+ messages in thread
From: Jeremy Fitzhardinge @ 2008-06-04 13:58 UTC (permalink / raw)
To: Mike Travis
Cc: Ingo Molnar, Andrew Morton, Christoph Lameter, David Miller,
Eric Dumazet, linux-kernel, Rusty Russell
Mike Travis wrote:
> Jeremy Fitzhardinge wrote:
>
>> Mike Travis wrote:
>>
>>> * Declare the pda as a per cpu variable.
>>>
>>> * Make the x86_64 per cpu area start at zero.
>>>
>>> * Since the pda is now the first element of the per_cpu area, cpu_pda()
>>> is no longer needed and per_cpu() can be used instead. This also
>>> makes
>>> the _cpu_pda[] table obsolete.
>>>
>>> * Since %gs is pointing to the pda, it will then also point to the
>>> per cpu
>>> variables and can be accessed thusly:
>>>
>>> %gs:[&per_cpu_xxxx - __per_cpu_start]
>>>
>>>
>
>
> The above is only a partial story (I folded the two patches but didn't
> update the comments correctly.] The variables are already offset from
> __per_cpu_start by virtue of the .data.percpu section being based at
> zero. Therefore only the %gs register needs to be set to the base of
> each cpu's percpu section to resolve the target address:
>
> %gs:&per_cpu_xxxx
>
Oh, good. I'd played with trying to make that work at one point, and
got lost in linker bugs and/or random version-specific strangeness.
J
^ permalink raw reply [flat|nested] 119+ messages in thread* Re: [PATCH 3/4] x86_64: Fold pda into per cpu area
2008-06-04 13:58 ` Jeremy Fitzhardinge
@ 2008-06-04 14:17 ` Mike Travis
0 siblings, 0 replies; 119+ messages in thread
From: Mike Travis @ 2008-06-04 14:17 UTC (permalink / raw)
To: Jeremy Fitzhardinge
Cc: Ingo Molnar, Andrew Morton, Christoph Lameter, David Miller,
Eric Dumazet, linux-kernel, Rusty Russell
Jeremy Fitzhardinge wrote:
> Mike Travis wrote:
>> Jeremy Fitzhardinge wrote:
>>
>>> Mike Travis wrote:
>>>
>>>> * Declare the pda as a per cpu variable.
>>>>
>>>> * Make the x86_64 per cpu area start at zero.
>>>>
>>>> * Since the pda is now the first element of the per_cpu area,
>>>> cpu_pda()
>>>> is no longer needed and per_cpu() can be used instead. This also
>>>> makes
>>>> the _cpu_pda[] table obsolete.
>>>>
>>>> * Since %gs is pointing to the pda, it will then also point to the
>>>> per cpu
>>>> variables and can be accessed thusly:
>>>>
>>>> %gs:[&per_cpu_xxxx - __per_cpu_start]
>>>>
>>>>
>>
>> The above is only a partial story (I folded the two patches but didn't
>> update the comments correctly.] The variables are already offset from
>> __per_cpu_start by virtue of the .data.percpu section being based at
>> zero. Therefore only the %gs register needs to be set to the base of
>> each cpu's percpu section to resolve the target address:
>>
>> %gs:&per_cpu_xxxx
>>
>
> Oh, good. I'd played with trying to make that work at one point, and
> got lost in linker bugs and/or random version-specific strangeness.
> J
Incidentally, this is why the following load is needed in x86_64_start_kernel():
pda = (struct x8664_pda *)__per_cpu_load;
pda->data_offset = per_cpu_offset(0) = (unsigned long)pda;
/* initialize boot cpu_pda data */
pda_init(0);
pda_init() loads the %gs reg so early accesses to the static per_cpu section
can be executed before the percpu areas are allocated.
^ permalink raw reply [flat|nested] 119+ messages in thread
* Re: [PATCH 3/4] x86_64: Fold pda into per cpu area
2008-06-04 12:59 ` Jeremy Fitzhardinge
2008-06-04 13:48 ` Mike Travis
@ 2008-06-09 23:18 ` Christoph Lameter
1 sibling, 0 replies; 119+ messages in thread
From: Christoph Lameter @ 2008-06-09 23:18 UTC (permalink / raw)
To: Jeremy Fitzhardinge
Cc: Mike Travis, Ingo Molnar, Andrew Morton, David Miller,
Eric Dumazet, linux-kernel, Rusty Russell
On Wed, 4 Jun 2008, Jeremy Fitzhardinge wrote:
> > %gs:[&per_cpu_xxxx - __per_cpu_start]
> >
>
> Unfortunately that doesn't actually work, because you can't have a reloc with
> two variables.
That is just a conceptual discussion. __per_cpu_start is 0 with the zero
based patch. And thus this reduces to
%gs[&per_cpu_xxx]
^ permalink raw reply [flat|nested] 119+ messages in thread
* [crash, bisected] Re: [PATCH 3/4] x86_64: Fold pda into per cpu area
2008-06-04 0:30 ` [PATCH 3/4] x86_64: Fold pda into per cpu area Mike Travis
2008-06-04 12:59 ` Jeremy Fitzhardinge
@ 2008-06-05 10:22 ` Ingo Molnar
2008-06-05 16:02 ` Mike Travis
2008-06-10 21:31 ` Mike Travis
1 sibling, 2 replies; 119+ messages in thread
From: Ingo Molnar @ 2008-06-05 10:22 UTC (permalink / raw)
To: Mike Travis
Cc: Andrew Morton, Christoph Lameter, David Miller, Eric Dumazet,
Jeremy Fitzhardinge, linux-kernel, the arch/x86 maintainers
* Mike Travis <travis@sgi.com> wrote:
> * Declare the pda as a per cpu variable.
>
> * Make the x86_64 per cpu area start at zero.
>
> * Since the pda is now the first element of the per_cpu area, cpu_pda()
> is no longer needed and per_cpu() can be used instead. This also makes
> the _cpu_pda[] table obsolete.
>
> * Since %gs is pointing to the pda, it will then also point to the per cpu
> variables and can be accessed thusly:
>
> %gs:[&per_cpu_xxxx - __per_cpu_start]
>
> Based on linux-2.6.tip
-tip testing found an instantaneous reboot crash on 64-bit x86, with
this config:
http://redhat.com/~mingo/misc/config-Thu_Jun__5_11_43_51_CEST_2008.bad
there is no boot log as the instantaneous reboot happens before anything
is printed to the (early-) serial console. I have bisected it down to:
| 7670dc09e89a2b151a1cf49eccebc07c41c2ce9f is first bad commit
| commit 7670dc09e89a2b151a1cf49eccebc07c41c2ce9f
| Author: Mike Travis <travis@sgi.com>
| Date: Tue Jun 3 17:30:21 2008 -0700
|
| x86_64: Fold pda into per cpu area
the big problem is not just this crash, but that the patch is _way_ too
big:
arch/x86/Kconfig | 3 +
arch/x86/kernel/head64.c | 34 ++++++--------
arch/x86/kernel/irq_64.c | 36 ++++++++-------
arch/x86/kernel/setup.c | 90 ++++++++++++---------------------------
arch/x86/kernel/setup64.c | 5 --
arch/x86/kernel/smpboot.c | 51 ----------------------
arch/x86/kernel/traps_64.c | 11 +++-
arch/x86/kernel/vmlinux_64.lds.S | 1
include/asm-x86/percpu.h | 48 ++++++--------------
9 files changed, 89 insertions(+), 190 deletions(-)
considering the danger involved, this is just way too large, and there's
no reasonable debugging i can do in the bisection to narrow it down any
further.
Please resubmit with the bug fixed and with a proper splitup, the more
patches you manage to create, the better. For a dangerous code area like
this, with a track record of frequent breakages in the past, i would not
mind a "one line of code changed per patch" splitup either. (Feel free
to send a git tree link for us to try as well.)
Ingo
^ permalink raw reply [flat|nested] 119+ messages in thread
* Re: [crash, bisected] Re: [PATCH 3/4] x86_64: Fold pda into per cpu area
2008-06-05 10:22 ` [crash, bisected] " Ingo Molnar
@ 2008-06-05 16:02 ` Mike Travis
2008-06-06 8:29 ` Jeremy Fitzhardinge
2008-06-10 21:31 ` Mike Travis
1 sibling, 1 reply; 119+ messages in thread
From: Mike Travis @ 2008-06-05 16:02 UTC (permalink / raw)
To: Ingo Molnar
Cc: Andrew Morton, Christoph Lameter, David Miller, Eric Dumazet,
Jeremy Fitzhardinge, linux-kernel, the arch/x86 maintainers
Ingo Molnar wrote:
> * Mike Travis <travis@sgi.com> wrote:
>
>> * Declare the pda as a per cpu variable.
>>
>> * Make the x86_64 per cpu area start at zero.
>>
>> * Since the pda is now the first element of the per_cpu area, cpu_pda()
>> is no longer needed and per_cpu() can be used instead. This also makes
>> the _cpu_pda[] table obsolete.
>>
>> * Since %gs is pointing to the pda, it will then also point to the per cpu
>> variables and can be accessed thusly:
>>
>> %gs:[&per_cpu_xxxx - __per_cpu_start]
>>
>> Based on linux-2.6.tip
>
> -tip testing found an instantaneous reboot crash on 64-bit x86, with
> this config:
>
> http://redhat.com/~mingo/misc/config-Thu_Jun__5_11_43_51_CEST_2008.bad
>
> there is no boot log as the instantaneous reboot happens before anything
> is printed to the (early-) serial console. I have bisected it down to:
>
> | 7670dc09e89a2b151a1cf49eccebc07c41c2ce9f is first bad commit
> | commit 7670dc09e89a2b151a1cf49eccebc07c41c2ce9f
> | Author: Mike Travis <travis@sgi.com>
> | Date: Tue Jun 3 17:30:21 2008 -0700
> |
> | x86_64: Fold pda into per cpu area
>
> the big problem is not just this crash, but that the patch is _way_ too
> big:
>
> arch/x86/Kconfig | 3 +
> arch/x86/kernel/head64.c | 34 ++++++--------
> arch/x86/kernel/irq_64.c | 36 ++++++++-------
> arch/x86/kernel/setup.c | 90 ++++++++++++---------------------------
> arch/x86/kernel/setup64.c | 5 --
> arch/x86/kernel/smpboot.c | 51 ----------------------
> arch/x86/kernel/traps_64.c | 11 +++-
> arch/x86/kernel/vmlinux_64.lds.S | 1
> include/asm-x86/percpu.h | 48 ++++++--------------
> 9 files changed, 89 insertions(+), 190 deletions(-)
>
> considering the danger involved, this is just way too large, and there's
> no reasonable debugging i can do in the bisection to narrow it down any
> further.
>
> Please resubmit with the bug fixed and with a proper splitup, the more
> patches you manage to create, the better. For a dangerous code area like
> this, with a track record of frequent breakages in the past, i would not
> mind a "one line of code changed per patch" splitup either. (Feel free
> to send a git tree link for us to try as well.)
>
> Ingo
Thanks for the feedback Ingo. I'll test the above config and look at
splitting up the patch. The difficulty is making each patch independently
compilable and testable.
Mike
^ permalink raw reply [flat|nested] 119+ messages in thread
* Re: [crash, bisected] Re: [PATCH 3/4] x86_64: Fold pda into per cpu area
2008-06-05 16:02 ` Mike Travis
@ 2008-06-06 8:29 ` Jeremy Fitzhardinge
2008-06-06 13:15 ` Mike Travis
0 siblings, 1 reply; 119+ messages in thread
From: Jeremy Fitzhardinge @ 2008-06-06 8:29 UTC (permalink / raw)
To: Mike Travis
Cc: Ingo Molnar, Andrew Morton, Christoph Lameter, David Miller,
Eric Dumazet, linux-kernel, the arch/x86 maintainers
Mike Travis wrote:
> Ingo Molnar wrote:
>
>> * Mike Travis <travis@sgi.com> wrote:
>>
>>
>>> * Declare the pda as a per cpu variable.
>>>
>>> * Make the x86_64 per cpu area start at zero.
>>>
>>> * Since the pda is now the first element of the per_cpu area, cpu_pda()
>>> is no longer needed and per_cpu() can be used instead. This also makes
>>> the _cpu_pda[] table obsolete.
>>>
>>> * Since %gs is pointing to the pda, it will then also point to the per cpu
>>> variables and can be accessed thusly:
>>>
>>> %gs:[&per_cpu_xxxx - __per_cpu_start]
>>>
>>> Based on linux-2.6.tip
>>>
>> -tip testing found an instantaneous reboot crash on 64-bit x86, with
>> this config:
>>
>> http://redhat.com/~mingo/misc/config-Thu_Jun__5_11_43_51_CEST_2008.bad
>>
>> there is no boot log as the instantaneous reboot happens before anything
>> is printed to the (early-) serial console. I have bisected it down to:
>>
>> | 7670dc09e89a2b151a1cf49eccebc07c41c2ce9f is first bad commit
>> | commit 7670dc09e89a2b151a1cf49eccebc07c41c2ce9f
>> | Author: Mike Travis <travis@sgi.com>
>> | Date: Tue Jun 3 17:30:21 2008 -0700
>> |
>> | x86_64: Fold pda into per cpu area
>>
>> the big problem is not just this crash, but that the patch is _way_ too
>> big:
>>
>> arch/x86/Kconfig | 3 +
>> arch/x86/kernel/head64.c | 34 ++++++--------
>> arch/x86/kernel/irq_64.c | 36 ++++++++-------
>> arch/x86/kernel/setup.c | 90 ++++++++++++---------------------------
>> arch/x86/kernel/setup64.c | 5 --
>> arch/x86/kernel/smpboot.c | 51 ----------------------
>> arch/x86/kernel/traps_64.c | 11 +++-
>> arch/x86/kernel/vmlinux_64.lds.S | 1
>> include/asm-x86/percpu.h | 48 ++++++--------------
>> 9 files changed, 89 insertions(+), 190 deletions(-)
>>
>> considering the danger involved, this is just way too large, and there's
>> no reasonable debugging i can do in the bisection to narrow it down any
>> further.
>>
>> Please resubmit with the bug fixed and with a proper splitup, the more
>> patches you manage to create, the better. For a dangerous code area like
>> this, with a track record of frequent breakages in the past, i would not
>> mind a "one line of code changed per patch" splitup either. (Feel free
>> to send a git tree link for us to try as well.)
>>
>> Ingo
>>
>
> Thanks for the feedback Ingo. I'll test the above config and look at
> splitting up the patch. The difficulty is making each patch independently
> compilable and testable.
FWIW, I'm getting past the "crashes very, very early" stage with this
series applied when booting under Xen. Then it crashes pretty early,
but that's not your fault...
J
^ permalink raw reply [flat|nested] 119+ messages in thread* Re: [crash, bisected] Re: [PATCH 3/4] x86_64: Fold pda into per cpu area
2008-06-06 8:29 ` Jeremy Fitzhardinge
@ 2008-06-06 13:15 ` Mike Travis
2008-06-18 5:34 ` Jeremy Fitzhardinge
0 siblings, 1 reply; 119+ messages in thread
From: Mike Travis @ 2008-06-06 13:15 UTC (permalink / raw)
To: Jeremy Fitzhardinge
Cc: Ingo Molnar, Andrew Morton, Christoph Lameter, David Miller,
Eric Dumazet, linux-kernel, the arch/x86 maintainers
Jeremy Fitzhardinge wrote:
> Mike Travis wrote:
>> Ingo Molnar wrote:
>>
>>> * Mike Travis <travis@sgi.com> wrote:
>>>
>>>
>>>> * Declare the pda as a per cpu variable.
>>>>
>>>> * Make the x86_64 per cpu area start at zero.
>>>>
>>>> * Since the pda is now the first element of the per_cpu area,
>>>> cpu_pda()
>>>> is no longer needed and per_cpu() can be used instead. This
>>>> also makes
>>>> the _cpu_pda[] table obsolete.
>>>>
>>>> * Since %gs is pointing to the pda, it will then also point to the
>>>> per cpu
>>>> variables and can be accessed thusly:
>>>>
>>>> %gs:[&per_cpu_xxxx - __per_cpu_start]
>>>>
>>>> Based on linux-2.6.tip
>>>>
>>> -tip testing found an instantaneous reboot crash on 64-bit x86, with
>>> this config:
>>>
>>> http://redhat.com/~mingo/misc/config-Thu_Jun__5_11_43_51_CEST_2008.bad
>>>
>>> there is no boot log as the instantaneous reboot happens before
>>> anything is printed to the (early-) serial console. I have bisected
>>> it down to:
>>>
>>> | 7670dc09e89a2b151a1cf49eccebc07c41c2ce9f is first bad commit
>>> | commit 7670dc09e89a2b151a1cf49eccebc07c41c2ce9f
>>> | Author: Mike Travis <travis@sgi.com>
>>> | Date: Tue Jun 3 17:30:21 2008 -0700
>>> |
>>> | x86_64: Fold pda into per cpu area
>>>
>>> the big problem is not just this crash, but that the patch is _way_
>>> too big:
>>>
>>> arch/x86/Kconfig | 3 +
>>> arch/x86/kernel/head64.c | 34 ++++++--------
>>> arch/x86/kernel/irq_64.c | 36 ++++++++-------
>>> arch/x86/kernel/setup.c | 90
>>> ++++++++++++---------------------------
>>> arch/x86/kernel/setup64.c | 5 --
>>> arch/x86/kernel/smpboot.c | 51 ----------------------
>>> arch/x86/kernel/traps_64.c | 11 +++-
>>> arch/x86/kernel/vmlinux_64.lds.S | 1
>>> include/asm-x86/percpu.h | 48 ++++++--------------
>>> 9 files changed, 89 insertions(+), 190 deletions(-)
>>>
>>> considering the danger involved, this is just way too large, and
>>> there's no reasonable debugging i can do in the bisection to narrow
>>> it down any further.
>>>
>>> Please resubmit with the bug fixed and with a proper splitup, the
>>> more patches you manage to create, the better. For a dangerous code
>>> area like this, with a track record of frequent breakages in the
>>> past, i would not mind a "one line of code changed per patch" splitup
>>> either. (Feel free to send a git tree link for us to try as well.)
>>>
>>> Ingo
>>>
>>
>> Thanks for the feedback Ingo. I'll test the above config and look at
>> splitting up the patch. The difficulty is making each patch
>> independently
>> compilable and testable.
>
> FWIW, I'm getting past the "crashes very, very early" stage with this
> series applied when booting under Xen. Then it crashes pretty early,
> but that's not your fault...
>
> J
Hi Jeremy,
Yes we have a simulator for Nahelem that also breezes past the boot up
problem (actually makes it to the kernel login prompt.) Weirdly, the
problem doesn't exist in an earlier code base so my changes are tickling
something else newly introduced. I'm attempting to see if I can use
GRUB 2 with the GDB stubs to track it down (which is time consuming in
itself to setup.)
It is definitely related to basing percpu variable offsets from %gs and
(I think) interrupts.
Thanks,
Mike
^ permalink raw reply [flat|nested] 119+ messages in thread
* Re: [crash, bisected] Re: [PATCH 3/4] x86_64: Fold pda into per cpu area
2008-06-06 13:15 ` Mike Travis
@ 2008-06-18 5:34 ` Jeremy Fitzhardinge
0 siblings, 0 replies; 119+ messages in thread
From: Jeremy Fitzhardinge @ 2008-06-18 5:34 UTC (permalink / raw)
To: Mike Travis
Cc: Ingo Molnar, Andrew Morton, Christoph Lameter, David Miller,
Eric Dumazet, linux-kernel, the arch/x86 maintainers
Mike Travis wrote:
> Jeremy Fitzhardinge wrote:
>
>> Mike Travis wrote:
>>
>>> Ingo Molnar wrote:
>>>
>>>
>>>> * Mike Travis <travis@sgi.com> wrote:
>>>>
>>>>
>>>>
>>>>> * Declare the pda as a per cpu variable.
>>>>>
>>>>> * Make the x86_64 per cpu area start at zero.
>>>>>
>>>>> * Since the pda is now the first element of the per_cpu area,
>>>>> cpu_pda()
>>>>> is no longer needed and per_cpu() can be used instead. This
>>>>> also makes
>>>>> the _cpu_pda[] table obsolete.
>>>>>
>>>>> * Since %gs is pointing to the pda, it will then also point to the
>>>>> per cpu
>>>>> variables and can be accessed thusly:
>>>>>
>>>>> %gs:[&per_cpu_xxxx - __per_cpu_start]
>>>>>
>>>>> Based on linux-2.6.tip
>>>>>
>>>>>
>>>> -tip testing found an instantaneous reboot crash on 64-bit x86, with
>>>> this config:
>>>>
>>>> http://redhat.com/~mingo/misc/config-Thu_Jun__5_11_43_51_CEST_2008.bad
>>>>
>>>> there is no boot log as the instantaneous reboot happens before
>>>> anything is printed to the (early-) serial console. I have bisected
>>>> it down to:
>>>>
>>>> | 7670dc09e89a2b151a1cf49eccebc07c41c2ce9f is first bad commit
>>>> | commit 7670dc09e89a2b151a1cf49eccebc07c41c2ce9f
>>>> | Author: Mike Travis <travis@sgi.com>
>>>> | Date: Tue Jun 3 17:30:21 2008 -0700
>>>> |
>>>> | x86_64: Fold pda into per cpu area
>>>>
>>>> the big problem is not just this crash, but that the patch is _way_
>>>> too big:
>>>>
>>>> arch/x86/Kconfig | 3 +
>>>> arch/x86/kernel/head64.c | 34 ++++++--------
>>>> arch/x86/kernel/irq_64.c | 36 ++++++++-------
>>>> arch/x86/kernel/setup.c | 90
>>>> ++++++++++++---------------------------
>>>> arch/x86/kernel/setup64.c | 5 --
>>>> arch/x86/kernel/smpboot.c | 51 ----------------------
>>>> arch/x86/kernel/traps_64.c | 11 +++-
>>>> arch/x86/kernel/vmlinux_64.lds.S | 1
>>>> include/asm-x86/percpu.h | 48 ++++++--------------
>>>> 9 files changed, 89 insertions(+), 190 deletions(-)
>>>>
>>>> considering the danger involved, this is just way too large, and
>>>> there's no reasonable debugging i can do in the bisection to narrow
>>>> it down any further.
>>>>
>>>> Please resubmit with the bug fixed and with a proper splitup, the
>>>> more patches you manage to create, the better. For a dangerous code
>>>> area like this, with a track record of frequent breakages in the
>>>> past, i would not mind a "one line of code changed per patch" splitup
>>>> either. (Feel free to send a git tree link for us to try as well.)
>>>>
>>>> Ingo
>>>>
>>>>
>>> Thanks for the feedback Ingo. I'll test the above config and look at
>>> splitting up the patch. The difficulty is making each patch
>>> independently
>>> compilable and testable.
>>>
>> FWIW, I'm getting past the "crashes very, very early" stage with this
>> series applied when booting under Xen. Then it crashes pretty early,
>> but that's not your fault...
>>
>> J
>>
>
> Hi Jeremy,
>
> Yes we have a simulator for Nahelem that also breezes past the boot up
> problem (actually makes it to the kernel login prompt.) Weirdly, the
> problem doesn't exist in an earlier code base so my changes are tickling
> something else newly introduced. I'm attempting to see if I can use
> GRUB 2 with the GDB stubs to track it down (which is time consuming in
> itself to setup.)
>
> It is definitely related to basing percpu variable offsets from %gs and
> (I think) interrupts.
>
Hi Mike,
Have you made any progress on this? I'm bumping up against it when I
run on native hardware (as opposed to under Xen).
J
^ permalink raw reply [flat|nested] 119+ messages in thread
* Re: [crash, bisected] Re: [PATCH 3/4] x86_64: Fold pda into per cpu area
2008-06-05 10:22 ` [crash, bisected] " Ingo Molnar
2008-06-05 16:02 ` Mike Travis
@ 2008-06-10 21:31 ` Mike Travis
2008-06-18 17:36 ` Jeremy Fitzhardinge
1 sibling, 1 reply; 119+ messages in thread
From: Mike Travis @ 2008-06-10 21:31 UTC (permalink / raw)
To: Ingo Molnar
Cc: Andrew Morton, Christoph Lameter, David Miller, Eric Dumazet,
Jeremy Fitzhardinge, linux-kernel, the arch/x86 maintainers
Ingo Molnar wrote:
> * Mike Travis <travis@sgi.com> wrote:
>
>> * Declare the pda as a per cpu variable.
>>
>> * Make the x86_64 per cpu area start at zero.
>>
>> * Since the pda is now the first element of the per_cpu area, cpu_pda()
>> is no longer needed and per_cpu() can be used instead. This also makes
>> the _cpu_pda[] table obsolete.
>>
>> * Since %gs is pointing to the pda, it will then also point to the per cpu
>> variables and can be accessed thusly:
>>
>> %gs:[&per_cpu_xxxx - __per_cpu_start]
>>
>> Based on linux-2.6.tip
>
> -tip testing found an instantaneous reboot crash on 64-bit x86, with
> this config:
>
> http://redhat.com/~mingo/misc/config-Thu_Jun__5_11_43_51_CEST_2008.bad
I'm still stuck on this one. One new development is that the current -tip
branch without the patches boots to the kernel prompt then hangs after a few
moments and then reboots. It seems you can tickle it using ^C to abort a
process.
-Mike
>
> there is no boot log as the instantaneous reboot happens before anything
> is printed to the (early-) serial console. I have bisected it down to:
>
> | 7670dc09e89a2b151a1cf49eccebc07c41c2ce9f is first bad commit
> | commit 7670dc09e89a2b151a1cf49eccebc07c41c2ce9f
> | Author: Mike Travis <travis@sgi.com>
> | Date: Tue Jun 3 17:30:21 2008 -0700
> |
> | x86_64: Fold pda into per cpu area
>
> the big problem is not just this crash, but that the patch is _way_ too
> big:
>
> arch/x86/Kconfig | 3 +
> arch/x86/kernel/head64.c | 34 ++++++--------
> arch/x86/kernel/irq_64.c | 36 ++++++++-------
> arch/x86/kernel/setup.c | 90 ++++++++++++---------------------------
> arch/x86/kernel/setup64.c | 5 --
> arch/x86/kernel/smpboot.c | 51 ----------------------
> arch/x86/kernel/traps_64.c | 11 +++-
> arch/x86/kernel/vmlinux_64.lds.S | 1
> include/asm-x86/percpu.h | 48 ++++++--------------
> 9 files changed, 89 insertions(+), 190 deletions(-)
>
> considering the danger involved, this is just way too large, and there's
> no reasonable debugging i can do in the bisection to narrow it down any
> further.
>
> Please resubmit with the bug fixed and with a proper splitup, the more
> patches you manage to create, the better. For a dangerous code area like
> this, with a track record of frequent breakages in the past, i would not
> mind a "one line of code changed per patch" splitup either. (Feel free
> to send a git tree link for us to try as well.)
>
> Ingo
^ permalink raw reply [flat|nested] 119+ messages in thread
* Re: [crash, bisected] Re: [PATCH 3/4] x86_64: Fold pda into per cpu area
2008-06-10 21:31 ` Mike Travis
@ 2008-06-18 17:36 ` Jeremy Fitzhardinge
2008-06-18 18:17 ` Mike Travis
0 siblings, 1 reply; 119+ messages in thread
From: Jeremy Fitzhardinge @ 2008-06-18 17:36 UTC (permalink / raw)
To: Mike Travis
Cc: Ingo Molnar, Andrew Morton, Christoph Lameter, David Miller,
Eric Dumazet, linux-kernel, the arch/x86 maintainers
Mike Travis wrote:
> Ingo Molnar wrote:
>
>> * Mike Travis <travis@sgi.com> wrote:
>>
>>
>>> * Declare the pda as a per cpu variable.
>>>
>>> * Make the x86_64 per cpu area start at zero.
>>>
>>> * Since the pda is now the first element of the per_cpu area, cpu_pda()
>>> is no longer needed and per_cpu() can be used instead. This also makes
>>> the _cpu_pda[] table obsolete.
>>>
>>> * Since %gs is pointing to the pda, it will then also point to the per cpu
>>> variables and can be accessed thusly:
>>>
>>> %gs:[&per_cpu_xxxx - __per_cpu_start]
>>>
>>> Based on linux-2.6.tip
>>>
>> -tip testing found an instantaneous reboot crash on 64-bit x86, with
>> this config:
>>
>> http://redhat.com/~mingo/misc/config-Thu_Jun__5_11_43_51_CEST_2008.bad
>>
>
> I'm still stuck on this one. One new development is that the current -tip
> branch without the patches boots to the kernel prompt then hangs after a few
> moments and then reboots. It seems you can tickle it using ^C to abort a
> process.
Hi Mike,
I added some instrumentation to Xen to print the cpu state on
triple-fault, which highlights an obvious-looking problem.
(XEN) hvm.c:767:d1 Triple fault on VCPU0 - invoking HVM system reset.
(XEN) ----[ Xen-3.3-unstable x86_64 debug=y Not tainted ]----
(XEN) CPU: 1
(XEN) RIP: 0010:[<ffffffff80200160>]
(XEN) RFLAGS: 0000000000010002 CONTEXT: hvm
(XEN) rax: 0000000000000018 rbx: 0000000000000000 rcx: 00000000c0000080
(XEN) rdx: 0000000000000000 rsi: 0000000000092f40 rdi: 0000000020100800
(XEN) rbp: 0000000000000000 rsp: ffffffff807dfff8 r8: 0000000000208000
(XEN) r9: 0000000000000000 r10: 0000000000000000 r11: 00000000000000de
(XEN) r12: 0000000000000000 r13: 0000000000000000 r14: 0000000000000000
(XEN) r15: 0000000000000000 cr0: 0000000080050033 cr4: 00000000000000a0
(XEN) cr3: 0000000000201000 cr2: 0000000000000000
(XEN) ds: 0000 es: 0000 fs: 0000 gs: 0000 ss: 0000 cs: 0010
The rip is:
(gdb) x/i 0xffffffff80200160
0xffffffff80200160 <secondary_startup_64+96>: movl %eax,%ds
which is:
lgdt early_gdt_descr(%rip)
/* set up data segments. actually 0 would do too */
movl $__KERNEL_DS,%eax
movl %eax,%ds
movl %eax,%ss
movl %eax,%es
And early_gdt_descr is:
.globl early_gdt_descr
early_gdt_descr:
.word GDT_ENTRIES*8-1
.quad per_cpu__gdt_page
and per_cpu__gdt_page is zero-based, and therefore not a directly
addressable symbol.
I tried this patch, but it didn't work. Perhaps I'm missing something.
diff -r bf5a46e13f78 arch/x86/kernel/head_64.S
--- a/arch/x86/kernel/head_64.S Tue Jun 17 22:10:51 2008 -0700
+++ b/arch/x86/kernel/head_64.S Wed Jun 18 10:34:24 2008 -0700
@@ -94,6 +94,8 @@
addq %rbp, level2_fixmap_pgt + (506*8)(%rip)
+ addq $__per_cpu_load, early_gdt_descr+2(%rip)
+
/* Add an Identity mapping if I am above 1G */
leaq _text(%rip), %rdi
andq $PMD_PAGE_MASK, %rdi
J
^ permalink raw reply [flat|nested] 119+ messages in thread* Re: [crash, bisected] Re: [PATCH 3/4] x86_64: Fold pda into per cpu area
2008-06-18 17:36 ` Jeremy Fitzhardinge
@ 2008-06-18 18:17 ` Mike Travis
2008-06-18 18:33 ` Ingo Molnar
2008-06-18 19:33 ` Jeremy Fitzhardinge
0 siblings, 2 replies; 119+ messages in thread
From: Mike Travis @ 2008-06-18 18:17 UTC (permalink / raw)
To: Jeremy Fitzhardinge
Cc: Ingo Molnar, Andrew Morton, Christoph Lameter, David Miller,
Eric Dumazet, linux-kernel, the arch/x86 maintainers
Jeremy Fitzhardinge wrote:
> Mike Travis wrote:
>> Ingo Molnar wrote:
>>
>>> * Mike Travis <travis@sgi.com> wrote:
>>>
>>>
>>>> * Declare the pda as a per cpu variable.
>>>>
>>>> * Make the x86_64 per cpu area start at zero.
>>>>
>>>> * Since the pda is now the first element of the per_cpu area,
>>>> cpu_pda()
>>>> is no longer needed and per_cpu() can be used instead. This
>>>> also makes
>>>> the _cpu_pda[] table obsolete.
>>>>
>>>> * Since %gs is pointing to the pda, it will then also point to the
>>>> per cpu
>>>> variables and can be accessed thusly:
>>>>
>>>> %gs:[&per_cpu_xxxx - __per_cpu_start]
>>>>
>>>> Based on linux-2.6.tip
>>>>
>>> -tip testing found an instantaneous reboot crash on 64-bit x86, with
>>> this config:
>>>
>>> http://redhat.com/~mingo/misc/config-Thu_Jun__5_11_43_51_CEST_2008.bad
>>>
>>
>> I'm still stuck on this one. One new development is that the current
>> -tip
>> branch without the patches boots to the kernel prompt then hangs after
>> a few
>> moments and then reboots. It seems you can tickle it using ^C to abort a
>> process.
>
> Hi Mike,
>
> I added some instrumentation to Xen to print the cpu state on
> triple-fault, which highlights an obvious-looking problem.
>
> (XEN) hvm.c:767:d1 Triple fault on VCPU0 - invoking HVM system reset.
> (XEN) ----[ Xen-3.3-unstable x86_64 debug=y Not tainted ]----
> (XEN) CPU: 1
> (XEN) RIP: 0010:[<ffffffff80200160>]
> (XEN) RFLAGS: 0000000000010002 CONTEXT: hvm
> (XEN) rax: 0000000000000018 rbx: 0000000000000000 rcx: 00000000c0000080
> (XEN) rdx: 0000000000000000 rsi: 0000000000092f40 rdi: 0000000020100800
> (XEN) rbp: 0000000000000000 rsp: ffffffff807dfff8 r8: 0000000000208000
> (XEN) r9: 0000000000000000 r10: 0000000000000000 r11: 00000000000000de
> (XEN) r12: 0000000000000000 r13: 0000000000000000 r14: 0000000000000000
> (XEN) r15: 0000000000000000 cr0: 0000000080050033 cr4: 00000000000000a0
> (XEN) cr3: 0000000000201000 cr2: 0000000000000000
> (XEN) ds: 0000 es: 0000 fs: 0000 gs: 0000 ss: 0000 cs: 0010
>
> The rip is:
>
> (gdb) x/i 0xffffffff80200160
> 0xffffffff80200160 <secondary_startup_64+96>: movl %eax,%ds
>
> which is:
>
> lgdt early_gdt_descr(%rip)
>
> /* set up data segments. actually 0 would do too */
> movl $__KERNEL_DS,%eax
> movl %eax,%ds
> movl %eax,%ss
> movl %eax,%es
>
> And early_gdt_descr is:
>
> .globl early_gdt_descr
> early_gdt_descr:
> .word GDT_ENTRIES*8-1
> .quad per_cpu__gdt_page
>
> and per_cpu__gdt_page is zero-based, and therefore not a directly
> addressable symbol.
>
> I tried this patch, but it didn't work. Perhaps I'm missing something.
>
> diff -r bf5a46e13f78 arch/x86/kernel/head_64.S
> --- a/arch/x86/kernel/head_64.S Tue Jun 17 22:10:51 2008 -0700
> +++ b/arch/x86/kernel/head_64.S Wed Jun 18 10:34:24 2008 -0700
> @@ -94,6 +94,8 @@
>
> addq %rbp, level2_fixmap_pgt + (506*8)(%rip)
>
> + addq $__per_cpu_load, early_gdt_descr+2(%rip)
> +
> /* Add an Identity mapping if I am above 1G */
> leaq _text(%rip), %rdi
> andq $PMD_PAGE_MASK, %rdi
>
>
> J
Hi Jeremy,
I'm not finding that code in the tip/latest or linux-next branches... ?
I can send you my latest version of the patch which is better than
the previous but still is having problems with the config file that
Ingo sent out. (It also has a weird quirk that it will hang and
reboot after about 30 seconds with or without my patch.)
Thanks,
Mike
^ permalink raw reply [flat|nested] 119+ messages in thread
* Re: [crash, bisected] Re: [PATCH 3/4] x86_64: Fold pda into per cpu area
2008-06-18 18:17 ` Mike Travis
@ 2008-06-18 18:33 ` Ingo Molnar
2008-06-18 19:33 ` Jeremy Fitzhardinge
1 sibling, 0 replies; 119+ messages in thread
From: Ingo Molnar @ 2008-06-18 18:33 UTC (permalink / raw)
To: Mike Travis
Cc: Jeremy Fitzhardinge, Andrew Morton, Christoph Lameter,
David Miller, Eric Dumazet, linux-kernel,
the arch/x86 maintainers
* Mike Travis <travis@sgi.com> wrote:
> Hi Jeremy,
>
> I'm not finding that code in the tip/latest or linux-next branches...
> ?
>
> I can send you my latest version of the patch which is better than the
> previous but still is having problems with the config file that Ingo
> sent out. (It also has a weird quirk that it will hang and reboot
> after about 30 seconds with or without my patch.)
the patch is not in -tip yet because we dont keep known-broken patches
applied unless there's some really strong reason to do so.
Ingo
^ permalink raw reply [flat|nested] 119+ messages in thread
* Re: [crash, bisected] Re: [PATCH 3/4] x86_64: Fold pda into per cpu area
2008-06-18 18:17 ` Mike Travis
2008-06-18 18:33 ` Ingo Molnar
@ 2008-06-18 19:33 ` Jeremy Fitzhardinge
[not found] ` <48596893.4040908@sgi.com>
1 sibling, 1 reply; 119+ messages in thread
From: Jeremy Fitzhardinge @ 2008-06-18 19:33 UTC (permalink / raw)
To: Mike Travis
Cc: Ingo Molnar, Andrew Morton, Christoph Lameter, David Miller,
Eric Dumazet, linux-kernel, the arch/x86 maintainers
Mike Travis wrote:
> Jeremy Fitzhardinge wrote:
>
>> Mike Travis wrote:
>>
>>> Ingo Molnar wrote:
>>>
>>>
>>>> * Mike Travis <travis@sgi.com> wrote:
>>>>
>>>>
>>>>
>>>>> * Declare the pda as a per cpu variable.
>>>>>
>>>>> * Make the x86_64 per cpu area start at zero.
>>>>>
>>>>> * Since the pda is now the first element of the per_cpu area,
>>>>> cpu_pda()
>>>>> is no longer needed and per_cpu() can be used instead. This
>>>>> also makes
>>>>> the _cpu_pda[] table obsolete.
>>>>>
>>>>> * Since %gs is pointing to the pda, it will then also point to the
>>>>> per cpu
>>>>> variables and can be accessed thusly:
>>>>>
>>>>> %gs:[&per_cpu_xxxx - __per_cpu_start]
>>>>>
>>>>> Based on linux-2.6.tip
>>>>>
>>>>>
>>>> -tip testing found an instantaneous reboot crash on 64-bit x86, with
>>>> this config:
>>>>
>>>> http://redhat.com/~mingo/misc/config-Thu_Jun__5_11_43_51_CEST_2008.bad
>>>>
>>>>
>>> I'm still stuck on this one. One new development is that the current
>>> -tip
>>> branch without the patches boots to the kernel prompt then hangs after
>>> a few
>>> moments and then reboots. It seems you can tickle it using ^C to abort a
>>> process.
>>>
>> Hi Mike,
>>
>> I added some instrumentation to Xen to print the cpu state on
>> triple-fault, which highlights an obvious-looking problem.
>>
>> (XEN) hvm.c:767:d1 Triple fault on VCPU0 - invoking HVM system reset.
>> (XEN) ----[ Xen-3.3-unstable x86_64 debug=y Not tainted ]----
>> (XEN) CPU: 1
>> (XEN) RIP: 0010:[<ffffffff80200160>]
>> (XEN) RFLAGS: 0000000000010002 CONTEXT: hvm
>> (XEN) rax: 0000000000000018 rbx: 0000000000000000 rcx: 00000000c0000080
>> (XEN) rdx: 0000000000000000 rsi: 0000000000092f40 rdi: 0000000020100800
>> (XEN) rbp: 0000000000000000 rsp: ffffffff807dfff8 r8: 0000000000208000
>> (XEN) r9: 0000000000000000 r10: 0000000000000000 r11: 00000000000000de
>> (XEN) r12: 0000000000000000 r13: 0000000000000000 r14: 0000000000000000
>> (XEN) r15: 0000000000000000 cr0: 0000000080050033 cr4: 00000000000000a0
>> (XEN) cr3: 0000000000201000 cr2: 0000000000000000
>> (XEN) ds: 0000 es: 0000 fs: 0000 gs: 0000 ss: 0000 cs: 0010
>>
>> The rip is:
>>
>> (gdb) x/i 0xffffffff80200160
>> 0xffffffff80200160 <secondary_startup_64+96>: movl %eax,%ds
>>
>> which is:
>>
>> lgdt early_gdt_descr(%rip)
>>
>> /* set up data segments. actually 0 would do too */
>> movl $__KERNEL_DS,%eax
>> movl %eax,%ds
>> movl %eax,%ss
>> movl %eax,%es
>>
>> And early_gdt_descr is:
>>
>> .globl early_gdt_descr
>> early_gdt_descr:
>> .word GDT_ENTRIES*8-1
>> .quad per_cpu__gdt_page
>>
>> and per_cpu__gdt_page is zero-based, and therefore not a directly
>> addressable symbol.
>>
>> I tried this patch, but it didn't work. Perhaps I'm missing something.
>>
>> diff -r bf5a46e13f78 arch/x86/kernel/head_64.S
>> --- a/arch/x86/kernel/head_64.S Tue Jun 17 22:10:51 2008 -0700
>> +++ b/arch/x86/kernel/head_64.S Wed Jun 18 10:34:24 2008 -0700
>> @@ -94,6 +94,8 @@
>>
>> addq %rbp, level2_fixmap_pgt + (506*8)(%rip)
>>
>> + addq $__per_cpu_load, early_gdt_descr+2(%rip)
>> +
>> /* Add an Identity mapping if I am above 1G */
>> leaq _text(%rip), %rdi
>> andq $PMD_PAGE_MASK, %rdi
>>
>>
>> J
>>
>
> Hi Jeremy,
>
> I'm not finding that code in the tip/latest or linux-next branches... ?
>
You mean your percpu/pda code? No, I'm carrying it locally because I
need it as a base for my Xen work. Xen bypasses these early boot
stages, so I haven't seen any problems so far.
But I'd also like to make sure that my Xen changes don't break native
boots, too...
> I can send you my latest version of the patch which is better than
> the previous but still is having problems with the config file that
> Ingo sent out. (It also has a weird quirk that it will hang and
> reboot after about 30 seconds with or without my patch.)
>
Yes, keep me uptodate with the percpu work.
J
^ permalink raw reply [flat|nested] 119+ messages in thread
* [PATCH 4/4] x86: Replace xxx_pda() operations with x86_xx_percpu().
2008-06-04 0:30 [PATCH 0/4] percpu: Optimize percpu accesses Mike Travis
` (2 preceding siblings ...)
2008-06-04 0:30 ` [PATCH 3/4] x86_64: Fold pda into per cpu area Mike Travis
@ 2008-06-04 0:30 ` Mike Travis
2008-06-09 13:03 ` Ingo Molnar
2008-06-04 10:18 ` [PATCH] x86: collapse the various size-dependent percpu accessors together Jeremy Fitzhardinge
4 siblings, 1 reply; 119+ messages in thread
From: Mike Travis @ 2008-06-04 0:30 UTC (permalink / raw)
To: Ingo Molnar
Cc: Andrew Morton, Christoph Lameter, David Miller, Eric Dumazet,
Jeremy Fitzhardinge, linux-kernel
[-- Attachment #1: zero_based_replace_pda_operations --]
[-- Type: text/plain, Size: 14788 bytes --]
* It is now possible to use percpu operations for pda access
since the pda is in the percpu area. Drop the pda operations.
Based on linux-2.6.tip
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Mike Travis <travis@sgi.com>
---
arch/x86/kernel/apic_64.c | 4 -
arch/x86/kernel/cpu/mcheck/mce_amd_64.c | 2
arch/x86/kernel/cpu/mcheck/mce_intel_64.c | 2
arch/x86/kernel/nmi.c | 5 +
arch/x86/kernel/process_64.c | 12 ++--
arch/x86/kernel/smp.c | 4 -
arch/x86/kernel/time_64.c | 2
arch/x86/kernel/tlb_64.c | 12 ++--
arch/x86/kernel/traps_64.c | 2
arch/x86/kernel/x8664_ksyms_64.c | 2
arch/x86/xen/smp.c | 2
include/asm-x86/current.h | 3 -
include/asm-x86/hardirq_64.h | 6 +-
include/asm-x86/mmu_context_64.h | 12 ++--
include/asm-x86/pda.h | 80 ++----------------------------
include/asm-x86/smp.h | 2
include/asm-x86/stackprotector.h | 2
include/asm-x86/thread_info.h | 3 -
include/asm-x86/topology.h | 2
19 files changed, 47 insertions(+), 112 deletions(-)
--- linux-2.6.tip.orig/arch/x86/kernel/apic_64.c
+++ linux-2.6.tip/arch/x86/kernel/apic_64.c
@@ -481,7 +481,7 @@ static void local_apic_timer_interrupt(v
/*
* the NMI deadlock-detector uses this.
*/
- add_pda(apic_timer_irqs, 1);
+ x86_inc_percpu(pda.apic_timer_irqs);
evt->event_handler(evt);
}
@@ -986,7 +986,7 @@ asmlinkage void smp_spurious_interrupt(v
if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
ack_APIC_irq();
- add_pda(irq_spurious_count, 1);
+ x86_inc_percpu(pda.irq_spurious_count);
irq_exit();
}
--- linux-2.6.tip.orig/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ linux-2.6.tip/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -237,7 +237,7 @@ asmlinkage void mce_threshold_interrupt(
}
}
out:
- add_pda(irq_threshold_count, 1);
+ x86_inc_percpu(pda.irq_threshold_count);
irq_exit();
}
--- linux-2.6.tip.orig/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ linux-2.6.tip/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -26,7 +26,7 @@ asmlinkage void smp_thermal_interrupt(vo
if (therm_throt_process(msr_val & 1))
mce_log_therm_throt_event(smp_processor_id(), msr_val);
- add_pda(irq_thermal_count, 1);
+ x86_inc_percpu(pda.irq_thermal_count);
irq_exit();
}
--- linux-2.6.tip.orig/arch/x86/kernel/nmi.c
+++ linux-2.6.tip/arch/x86/kernel/nmi.c
@@ -56,7 +56,7 @@ static int endflag __initdata = 0;
static inline unsigned int get_nmi_count(int cpu)
{
#ifdef CONFIG_X86_64
- return cpu_pda(cpu)->__nmi_count;
+ return x86_read_percpu(pda.__nmi_count);
#else
return nmi_count(cpu);
#endif
@@ -77,7 +77,8 @@ static inline int mce_in_progress(void)
static inline unsigned int get_timer_irqs(int cpu)
{
#ifdef CONFIG_X86_64
- return read_pda(apic_timer_irqs) + read_pda(irq0_irqs);
+ return x86_read_percpu(pda.apic_timer_irqs) +
+ x86_read_percpu(pda.irq0_irqs);
#else
return per_cpu(irq_stat, cpu).apic_timer_irqs +
per_cpu(irq_stat, cpu).irq0_irqs;
--- linux-2.6.tip.orig/arch/x86/kernel/process_64.c
+++ linux-2.6.tip/arch/x86/kernel/process_64.c
@@ -75,7 +75,7 @@ void idle_notifier_register(struct notif
void enter_idle(void)
{
- write_pda(isidle, 1);
+ x86_write_percpu(pda.isidle, 1);
atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
}
@@ -438,7 +438,7 @@ start_thread(struct pt_regs *regs, unsig
load_gs_index(0);
regs->ip = new_ip;
regs->sp = new_sp;
- write_pda(oldrsp, new_sp);
+ x86_write_percpu(pda.oldrsp, new_sp);
regs->cs = __USER_CS;
regs->ss = __USER_DS;
regs->flags = 0x200;
@@ -674,11 +674,11 @@ __switch_to(struct task_struct *prev_p,
/*
* Switch the PDA and FPU contexts.
*/
- prev->usersp = read_pda(oldrsp);
- write_pda(oldrsp, next->usersp);
- write_pda(pcurrent, next_p);
+ prev->usersp = x86_read_percpu(pda.oldrsp);
+ x86_write_percpu(pda.oldrsp, next->usersp);
+ x86_write_percpu(pda.pcurrent, next_p);
- write_pda(kernelstack,
+ x86_write_percpu(pda.kernelstack,
(unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
#ifdef CONFIG_CC_STACKPROTECTOR
/*
--- linux-2.6.tip.orig/arch/x86/kernel/smp.c
+++ linux-2.6.tip/arch/x86/kernel/smp.c
@@ -295,7 +295,7 @@ void smp_reschedule_interrupt(struct pt_
#ifdef CONFIG_X86_32
__get_cpu_var(irq_stat).irq_resched_count++;
#else
- add_pda(irq_resched_count, 1);
+ x86_inc_percpu(pda.irq_resched_count);
#endif
}
@@ -320,7 +320,7 @@ void smp_call_function_interrupt(struct
#ifdef CONFIG_X86_32
__get_cpu_var(irq_stat).irq_call_count++;
#else
- add_pda(irq_call_count, 1);
+ x86_inc_percpu(pda.irq_call_count);
#endif
irq_exit();
--- linux-2.6.tip.orig/arch/x86/kernel/time_64.c
+++ linux-2.6.tip/arch/x86/kernel/time_64.c
@@ -46,7 +46,7 @@ EXPORT_SYMBOL(profile_pc);
static irqreturn_t timer_event_interrupt(int irq, void *dev_id)
{
- add_pda(irq0_irqs, 1);
+ x86_inc_percpu(pda.irq0_irqs);
global_clock_event->event_handler(global_clock_event);
--- linux-2.6.tip.orig/arch/x86/kernel/tlb_64.c
+++ linux-2.6.tip/arch/x86/kernel/tlb_64.c
@@ -60,9 +60,9 @@ static DEFINE_PER_CPU(union smp_flush_st
*/
void leave_mm(int cpu)
{
- if (read_pda(mmu_state) == TLBSTATE_OK)
+ if (x86_read_percpu(pda.mmu_state) == TLBSTATE_OK)
BUG();
- cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask);
+ cpu_clear(cpu, x86_read_percpu(pda.active_mm)->cpu_vm_mask);
load_cr3(swapper_pg_dir);
}
EXPORT_SYMBOL_GPL(leave_mm);
@@ -140,8 +140,8 @@ asmlinkage void smp_invalidate_interrupt
* BUG();
*/
- if (f->flush_mm == read_pda(active_mm)) {
- if (read_pda(mmu_state) == TLBSTATE_OK) {
+ if (f->flush_mm == x86_read_percpu(pda.active_mm)) {
+ if (x86_read_percpu(pda.mmu_state) == TLBSTATE_OK) {
if (f->flush_va == TLB_FLUSH_ALL)
local_flush_tlb();
else
@@ -152,7 +152,7 @@ asmlinkage void smp_invalidate_interrupt
out:
ack_APIC_irq();
cpu_clear(cpu, f->flush_cpumask);
- add_pda(irq_tlb_count, 1);
+ x86_inc_percpu(pda.irq_tlb_count);
}
void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
@@ -264,7 +264,7 @@ static void do_flush_tlb_all(void *info)
unsigned long cpu = smp_processor_id();
__flush_tlb_all();
- if (read_pda(mmu_state) == TLBSTATE_LAZY)
+ if (x86_read_percpu(pda.mmu_state) == TLBSTATE_LAZY)
leave_mm(cpu);
}
--- linux-2.6.tip.orig/arch/x86/kernel/traps_64.c
+++ linux-2.6.tip/arch/x86/kernel/traps_64.c
@@ -878,7 +878,7 @@ asmlinkage notrace __kprobes void
do_nmi(struct pt_regs *regs, long error_code)
{
nmi_enter();
- add_pda(__nmi_count, 1);
+ x86_inc_percpu(pda.__nmi_count);
if (!ignore_nmis)
default_do_nmi(regs);
nmi_exit();
--- linux-2.6.tip.orig/arch/x86/kernel/x8664_ksyms_64.c
+++ linux-2.6.tip/arch/x86/kernel/x8664_ksyms_64.c
@@ -59,8 +59,6 @@ EXPORT_SYMBOL(empty_zero_page);
EXPORT_SYMBOL(init_level4_pgt);
EXPORT_SYMBOL(load_gs_index);
-EXPORT_SYMBOL(_proxy_pda);
-
#ifdef CONFIG_PARAVIRT
/* Virtualized guests may want to use it */
EXPORT_SYMBOL_GPL(cpu_gdt_descr);
--- linux-2.6.tip.orig/arch/x86/xen/smp.c
+++ linux-2.6.tip/arch/x86/xen/smp.c
@@ -68,7 +68,7 @@ static irqreturn_t xen_reschedule_interr
#ifdef CONFIG_X86_32
__get_cpu_var(irq_stat).irq_resched_count++;
#else
- add_pda(irq_resched_count, 1);
+ x86_inc_percpu(pda.irq_resched_count);
#endif
return IRQ_HANDLED;
--- linux-2.6.tip.orig/include/asm-x86/current.h
+++ linux-2.6.tip/include/asm-x86/current.h
@@ -17,12 +17,13 @@ static __always_inline struct task_struc
#ifndef __ASSEMBLY__
#include <asm/pda.h>
+#include <asm/percpu.h>
struct task_struct;
static __always_inline struct task_struct *get_current(void)
{
- return read_pda(pcurrent);
+ return x86_read_percpu(pda.pcurrent);
}
#else /* __ASSEMBLY__ */
--- linux-2.6.tip.orig/include/asm-x86/hardirq_64.h
+++ linux-2.6.tip/include/asm-x86/hardirq_64.h
@@ -11,12 +11,12 @@
#define __ARCH_IRQ_STAT 1
-#define local_softirq_pending() read_pda(__softirq_pending)
+#define local_softirq_pending() x86_read_percpu(pda.__softirq_pending)
#define __ARCH_SET_SOFTIRQ_PENDING 1
-#define set_softirq_pending(x) write_pda(__softirq_pending, (x))
-#define or_softirq_pending(x) or_pda(__softirq_pending, (x))
+#define set_softirq_pending(x) x86_write_percpu(pda.__softirq_pending, (x))
+#define or_softirq_pending(x) x86_or_percpu(pda.__softirq_pending, (x))
extern void ack_bad_irq(unsigned int irq);
--- linux-2.6.tip.orig/include/asm-x86/mmu_context_64.h
+++ linux-2.6.tip/include/asm-x86/mmu_context_64.h
@@ -20,8 +20,8 @@ void destroy_context(struct mm_struct *m
static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
{
#ifdef CONFIG_SMP
- if (read_pda(mmu_state) == TLBSTATE_OK)
- write_pda(mmu_state, TLBSTATE_LAZY);
+ if (x86_read_percpu(pda.mmu_state) == TLBSTATE_OK)
+ x86_write_percpu(pda.mmu_state, TLBSTATE_LAZY);
#endif
}
@@ -33,8 +33,8 @@ static inline void switch_mm(struct mm_s
/* stop flush ipis for the previous mm */
cpu_clear(cpu, prev->cpu_vm_mask);
#ifdef CONFIG_SMP
- write_pda(mmu_state, TLBSTATE_OK);
- write_pda(active_mm, next);
+ x86_write_percpu(pda.mmu_state, TLBSTATE_OK);
+ x86_write_percpu(pda.active_mm, next);
#endif
cpu_set(cpu, next->cpu_vm_mask);
load_cr3(next->pgd);
@@ -44,8 +44,8 @@ static inline void switch_mm(struct mm_s
}
#ifdef CONFIG_SMP
else {
- write_pda(mmu_state, TLBSTATE_OK);
- if (read_pda(active_mm) != next)
+ x86_write_percpu(pda.mmu_state, TLBSTATE_OK);
+ if (x86_read_percpu(pda.active_mm) != next)
BUG();
if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
/* We were in lazy tlb mode and leave_mm disabled
--- linux-2.6.tip.orig/include/asm-x86/pda.h
+++ linux-2.6.tip/include/asm-x86/pda.h
@@ -21,7 +21,7 @@ struct x8664_pda {
offset 40!!! */
char *irqstackptr;
short nodenumber; /* number of current node (32k max) */
- short in_bootmem; /* pda lives in bootmem */
+ short unused1; /* unused */
unsigned int __softirq_pending;
unsigned int __nmi_count; /* number of NMI on this CPUs */
short mmu_state;
@@ -37,17 +37,8 @@ struct x8664_pda {
unsigned irq_spurious_count;
} ____cacheline_aligned_in_smp;
-extern struct x8664_pda **_cpu_pda;
extern void pda_init(int);
-#define cpu_pda(i) (_cpu_pda[i])
-
-/*
- * There is no fast way to get the base address of the PDA, all the accesses
- * have to mention %fs/%gs. So it needs to be done this Torvaldian way.
- */
-extern void __bad_pda_field(void) __attribute__((noreturn));
-
/*
* proxy_pda doesn't actually exist, but tell gcc it is accessed for
* all PDA accesses so it gets read/write dependencies right.
@@ -56,69 +47,11 @@ extern struct x8664_pda _proxy_pda;
#define pda_offset(field) offsetof(struct x8664_pda, field)
-#define pda_to_op(op, field, val) \
-do { \
- typedef typeof(_proxy_pda.field) T__; \
- if (0) { T__ tmp__; tmp__ = (val); } /* type checking */ \
- switch (sizeof(_proxy_pda.field)) { \
- case 2: \
- asm(op "w %1,%%gs:%c2" : \
- "+m" (_proxy_pda.field) : \
- "ri" ((T__)val), \
- "i"(pda_offset(field))); \
- break; \
- case 4: \
- asm(op "l %1,%%gs:%c2" : \
- "+m" (_proxy_pda.field) : \
- "ri" ((T__)val), \
- "i" (pda_offset(field))); \
- break; \
- case 8: \
- asm(op "q %1,%%gs:%c2": \
- "+m" (_proxy_pda.field) : \
- "ri" ((T__)val), \
- "i"(pda_offset(field))); \
- break; \
- default: \
- __bad_pda_field(); \
- } \
-} while (0)
-
-#define pda_from_op(op, field) \
-({ \
- typeof(_proxy_pda.field) ret__; \
- switch (sizeof(_proxy_pda.field)) { \
- case 2: \
- asm(op "w %%gs:%c1,%0" : \
- "=r" (ret__) : \
- "i" (pda_offset(field)), \
- "m" (_proxy_pda.field)); \
- break; \
- case 4: \
- asm(op "l %%gs:%c1,%0": \
- "=r" (ret__): \
- "i" (pda_offset(field)), \
- "m" (_proxy_pda.field)); \
- break; \
- case 8: \
- asm(op "q %%gs:%c1,%0": \
- "=r" (ret__) : \
- "i" (pda_offset(field)), \
- "m" (_proxy_pda.field)); \
- break; \
- default: \
- __bad_pda_field(); \
- } \
- ret__; \
-})
-
-#define read_pda(field) pda_from_op("mov", field)
-#define write_pda(field, val) pda_to_op("mov", field, val)
-#define add_pda(field, val) pda_to_op("add", field, val)
-#define sub_pda(field, val) pda_to_op("sub", field, val)
-#define or_pda(field, val) pda_to_op("or", field, val)
-
-/* This is not atomic against other CPUs -- CPU preemption needs to be off */
+/*
+ * This is not atomic against other CPUs -- CPU preemption needs to be off
+ * NOTE: This relies on the fact that the cpu_pda is the *first* field in
+ * the per cpu area. Move it and you'll need to change this.
+ */
#define test_and_clear_bit_pda(bit, field) \
({ \
int old__; \
@@ -128,6 +61,7 @@ do { \
old__; \
})
+
#endif
#define PDA_STACKOFFSET (5*8)
--- linux-2.6.tip.orig/include/asm-x86/smp.h
+++ linux-2.6.tip/include/asm-x86/smp.h
@@ -134,7 +134,7 @@ DECLARE_PER_CPU(int, cpu_number);
extern int safe_smp_processor_id(void);
#elif defined(CONFIG_X86_64_SMP)
-#define raw_smp_processor_id() read_pda(cpunumber)
+#define raw_smp_processor_id() x86_read_percpu(pda.cpunumber)
#define stack_smp_processor_id() \
({ \
--- linux-2.6.tip.orig/include/asm-x86/stackprotector.h
+++ linux-2.6.tip/include/asm-x86/stackprotector.h
@@ -32,7 +32,7 @@ static __always_inline void boot_init_st
canary += tsc + (tsc << 32UL);
current->stack_canary = canary;
- write_pda(stack_canary, canary);
+ x86_write_percpu(pda.stack_canary, canary);
}
#endif
--- linux-2.6.tip.orig/include/asm-x86/thread_info.h
+++ linux-2.6.tip/include/asm-x86/thread_info.h
@@ -200,7 +200,8 @@ static inline struct thread_info *curren
static inline struct thread_info *current_thread_info(void)
{
struct thread_info *ti;
- ti = (void *)(read_pda(kernelstack) + PDA_STACKOFFSET - THREAD_SIZE);
+ ti = (void *)(x86_read_percpu(pda.kernelstack) +
+ PDA_STACKOFFSET - THREAD_SIZE);
return ti;
}
--- linux-2.6.tip.orig/include/asm-x86/topology.h
+++ linux-2.6.tip/include/asm-x86/topology.h
@@ -72,7 +72,7 @@ extern cpumask_t *node_to_cpumask_map;
DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map);
/* Returns the number of the current Node. */
-#define numa_node_id() read_pda(nodenumber)
+#define numa_node_id() x86_read_percpu(pda.nodenumber)
#ifdef CONFIG_DEBUG_PER_CPU_MAPS
extern int cpu_to_node(int cpu);
--
^ permalink raw reply [flat|nested] 119+ messages in thread* Re: [PATCH 4/4] x86: Replace xxx_pda() operations with x86_xx_percpu().
2008-06-04 0:30 ` [PATCH 4/4] x86: Replace xxx_pda() operations with x86_xx_percpu() Mike Travis
@ 2008-06-09 13:03 ` Ingo Molnar
2008-06-09 16:08 ` Mike Travis
2008-06-09 17:36 ` Mike Travis
0 siblings, 2 replies; 119+ messages in thread
From: Ingo Molnar @ 2008-06-09 13:03 UTC (permalink / raw)
To: Mike Travis
Cc: Andrew Morton, Christoph Lameter, David Miller, Eric Dumazet,
Jeremy Fitzhardinge, linux-kernel
[-- Attachment #1: Type: text/plain, Size: 233 bytes --]
* Mike Travis <travis@sgi.com> wrote:
> * It is now possible to use percpu operations for pda access
> since the pda is in the percpu area. Drop the pda operations.
FYI, this one didnt build with the attached config.
Ingo
[-- Attachment #2: config --]
[-- Type: text/plain, Size: 32775 bytes --]
#
# Automatically generated make config: don't edit
# Linux kernel version: 2.6.26-rc5
# Mon Jun 9 14:59:39 2008
#
CONFIG_64BIT=y
# CONFIG_X86_32 is not set
CONFIG_X86_64=y
CONFIG_X86=y
CONFIG_ARCH_DEFCONFIG="arch/x86/configs/x86_64_defconfig"
# CONFIG_GENERIC_LOCKBREAK is not set
CONFIG_GENERIC_TIME=y
CONFIG_GENERIC_CMOS_UPDATE=y
CONFIG_CLOCKSOURCE_WATCHDOG=y
CONFIG_GENERIC_CLOCKEVENTS=y
CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y
CONFIG_LOCKDEP_SUPPORT=y
CONFIG_STACKTRACE_SUPPORT=y
CONFIG_HAVE_LATENCYTOP_SUPPORT=y
CONFIG_FAST_CMPXCHG_LOCAL=y
CONFIG_MMU=y
CONFIG_ZONE_DMA=y
CONFIG_GENERIC_ISA_DMA=y
CONFIG_GENERIC_IOMAP=y
CONFIG_GENERIC_HWEIGHT=y
# CONFIG_GENERIC_GPIO is not set
CONFIG_ARCH_MAY_HAVE_PC_FDC=y
CONFIG_RWSEM_GENERIC_SPINLOCK=y
# CONFIG_RWSEM_XCHGADD_ALGORITHM is not set
# CONFIG_ARCH_HAS_ILOG2_U32 is not set
# CONFIG_ARCH_HAS_ILOG2_U64 is not set
CONFIG_ARCH_HAS_CPU_IDLE_WAIT=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
CONFIG_GENERIC_TIME_VSYSCALL=y
CONFIG_ARCH_HAS_CPU_RELAX=y
CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
CONFIG_HAVE_SETUP_PER_CPU_AREA=y
CONFIG_HAVE_CPUMASK_OF_CPU_MAP=y
CONFIG_ARCH_HIBERNATION_POSSIBLE=y
CONFIG_ARCH_SUSPEND_POSSIBLE=y
CONFIG_ZONE_DMA32=y
CONFIG_ARCH_POPULATES_NODE_MAP=y
CONFIG_AUDIT_ARCH=y
CONFIG_ARCH_SUPPORTS_AOUT=y
CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y
CONFIG_GENERIC_HARDIRQS=y
CONFIG_GENERIC_IRQ_PROBE=y
CONFIG_GENERIC_PENDING_IRQ=y
CONFIG_X86_SMP=y
CONFIG_X86_64_SMP=y
CONFIG_X86_HT=y
CONFIG_X86_BIOS_REBOOT=y
CONFIG_X86_TRAMPOLINE=y
# CONFIG_KTIME_SCALAR is not set
# CONFIG_BOOTPARAM_SUPPORT_WANTED is not set
CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
#
# General setup
#
CONFIG_EXPERIMENTAL=y
CONFIG_BROKEN_BOOT_ALLOWED3=y
CONFIG_BROKEN_BOOT_ALLOWED2=y
CONFIG_BROKEN_BOOT_ALLOWED=y
CONFIG_BROKEN_BOOT=y
CONFIG_BROKEN_BOOT_EUROPE=y
CONFIG_BROKEN_BOOT_TITAN=y
CONFIG_LOCK_KERNEL=y
CONFIG_INIT_ENV_ARG_LIMIT=32
CONFIG_LOCALVERSION=""
# CONFIG_LOCALVERSION_AUTO is not set
# CONFIG_SWAP is not set
# CONFIG_SYSVIPC is not set
# CONFIG_POSIX_MQUEUE is not set
CONFIG_BSD_PROCESS_ACCT=y
# CONFIG_BSD_PROCESS_ACCT_V3 is not set
# CONFIG_TASKSTATS is not set
# CONFIG_AUDIT is not set
# CONFIG_IKCONFIG is not set
CONFIG_LOG_BUF_SHIFT=20
CONFIG_CGROUPS=y
CONFIG_CGROUP_DEBUG=y
# CONFIG_CGROUP_NS is not set
CONFIG_CGROUP_DEVICE=y
# CONFIG_CPUSETS is not set
CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y
# CONFIG_GROUP_SCHED is not set
# CONFIG_CGROUP_CPUACCT is not set
CONFIG_RESOURCE_COUNTERS=y
# CONFIG_CGROUP_MEM_RES_CTLR is not set
CONFIG_RELAY=y
# CONFIG_NAMESPACES is not set
# CONFIG_BLK_DEV_INITRD is not set
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EMBEDDED=y
# CONFIG_UID16 is not set
# CONFIG_SYSCTL_SYSCALL is not set
CONFIG_KALLSYMS=y
CONFIG_KALLSYMS_ALL=y
# CONFIG_KALLSYMS_EXTRA_PASS is not set
# CONFIG_HOTPLUG is not set
# CONFIG_PRINTK is not set
# CONFIG_BUG is not set
CONFIG_ELF_CORE=y
CONFIG_PCSPKR_PLATFORM=y
CONFIG_COMPAT_BRK=y
CONFIG_BASE_FULL=y
# CONFIG_FUTEX is not set
CONFIG_ANON_INODES=y
# CONFIG_EPOLL is not set
CONFIG_SIGNALFD=y
CONFIG_TIMERFD=y
CONFIG_EVENTFD=y
CONFIG_SHMEM=y
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_SLAB is not set
CONFIG_SLUB=y
# CONFIG_SLOB is not set
# CONFIG_PROFILING is not set
CONFIG_MARKERS=y
CONFIG_HAVE_OPROFILE=y
CONFIG_HAVE_KPROBES=y
CONFIG_HAVE_KRETPROBES=y
# CONFIG_HAVE_DMA_ATTRS is not set
CONFIG_HAVE_IMMEDIATE=y
# CONFIG_IMMEDIATE is not set
# CONFIG_TINY_SHMEM is not set
CONFIG_BASE_SMALL=0
# CONFIG_MODULES is not set
CONFIG_BLOCK=y
# CONFIG_BLK_DEV_BSG is not set
CONFIG_BLOCK_COMPAT=y
#
# IO Schedulers
#
CONFIG_IOSCHED_NOOP=y
# CONFIG_IOSCHED_AS is not set
# CONFIG_IOSCHED_DEADLINE is not set
CONFIG_IOSCHED_CFQ=y
# CONFIG_DEFAULT_AS is not set
# CONFIG_DEFAULT_DEADLINE is not set
CONFIG_DEFAULT_CFQ=y
# CONFIG_DEFAULT_NOOP is not set
CONFIG_DEFAULT_IOSCHED="cfq"
CONFIG_CLASSIC_RCU=y
#
# Processor type and features
#
CONFIG_TICK_ONESHOT=y
# CONFIG_NO_HZ is not set
CONFIG_HIGH_RES_TIMERS=y
CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
CONFIG_SMP_SUPPORT=y
CONFIG_UP_WANTED_1=y
# CONFIG_UP_WANTED_2 is not set
CONFIG_SMP=y
CONFIG_X86_PC=y
# CONFIG_X86_ELAN is not set
# CONFIG_X86_VOYAGER is not set
# CONFIG_X86_VISWS is not set
# CONFIG_X86_GENERICARCH is not set
# CONFIG_X86_RDC321X is not set
# CONFIG_X86_VSMP is not set
CONFIG_PARAVIRT_GUEST=y
# CONFIG_KVM_CLOCK is not set
# CONFIG_KVM_GUEST is not set
CONFIG_PARAVIRT=y
# CONFIG_MEMTEST is not set
# CONFIG_M386 is not set
# CONFIG_M486 is not set
# CONFIG_M586 is not set
# CONFIG_M586TSC is not set
# CONFIG_M586MMX is not set
# CONFIG_M686 is not set
# CONFIG_MPENTIUMII is not set
# CONFIG_MPENTIUMIII is not set
# CONFIG_MPENTIUMM is not set
# CONFIG_MPENTIUM4 is not set
# CONFIG_MK6 is not set
# CONFIG_MK7 is not set
# CONFIG_MK8 is not set
# CONFIG_MCRUSOE is not set
# CONFIG_MEFFICEON is not set
# CONFIG_MWINCHIPC6 is not set
# CONFIG_MWINCHIP2 is not set
# CONFIG_MWINCHIP3D is not set
# CONFIG_MGEODEGX1 is not set
# CONFIG_MGEODE_LX is not set
# CONFIG_MCYRIXIII is not set
# CONFIG_MVIAC3_2 is not set
# CONFIG_MVIAC7 is not set
# CONFIG_MPSC is not set
CONFIG_MCORE2=y
# CONFIG_GENERIC_CPU is not set
CONFIG_X86_CPU=y
CONFIG_X86_L1_CACHE_BYTES=64
CONFIG_X86_INTERNODE_CACHE_BYTES=64
CONFIG_X86_CMPXCHG=y
CONFIG_X86_L1_CACHE_SHIFT=6
CONFIG_X86_GOOD_APIC=y
CONFIG_X86_INTEL_USERCOPY=y
CONFIG_X86_USE_PPRO_CHECKSUM=y
CONFIG_X86_P6_NOP=y
CONFIG_X86_TSC=y
CONFIG_X86_CMPXCHG64=y
CONFIG_X86_CMOV=y
CONFIG_X86_MINIMUM_CPU_FAMILY=64
CONFIG_X86_DEBUGCTLMSR=y
# CONFIG_X86_DS is not set
CONFIG_HPET_TIMER=y
# CONFIG_DMI is not set
CONFIG_GART_IOMMU=y
# CONFIG_CALGARY_IOMMU is not set
CONFIG_SWIOTLB=y
CONFIG_IOMMU_HELPER=y
# CONFIG_MAXSMP is not set
CONFIG_NR_CPUS=8
# CONFIG_SCHED_SMT is not set
# CONFIG_SCHED_MC is not set
CONFIG_PREEMPT_NONE=y
# CONFIG_PREEMPT_VOLUNTARY is not set
# CONFIG_PREEMPT is not set
CONFIG_X86_LOCAL_APIC=y
CONFIG_X86_IO_APIC=y
CONFIG_X86_MCE=y
CONFIG_X86_MCE_INTEL=y
CONFIG_X86_MCE_AMD=y
CONFIG_I8K=y
CONFIG_MICROCODE=y
CONFIG_MICROCODE_OLD_INTERFACE=y
CONFIG_X86_MSR=y
# CONFIG_X86_CPUID is not set
# CONFIG_NUMA is not set
CONFIG_ARCH_SPARSEMEM_DEFAULT=y
CONFIG_ARCH_SPARSEMEM_ENABLE=y
CONFIG_ARCH_SELECT_MEMORY_MODEL=y
CONFIG_ILLEGAL_POINTER_VALUE=0xffffc10000000000
CONFIG_SELECT_MEMORY_MODEL=y
# CONFIG_FLATMEM_MANUAL is not set
# CONFIG_DISCONTIGMEM_MANUAL is not set
CONFIG_SPARSEMEM_MANUAL=y
CONFIG_SPARSEMEM=y
CONFIG_HAVE_MEMORY_PRESENT=y
# CONFIG_SPARSEMEM_STATIC is not set
CONFIG_SPARSEMEM_EXTREME=y
CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
# CONFIG_SPARSEMEM_VMEMMAP is not set
CONFIG_PAGEFLAGS_EXTENDED=y
CONFIG_SPLIT_PTLOCK_CPUS=4
CONFIG_RESOURCES_64BIT=y
CONFIG_ZONE_DMA_FLAG=1
CONFIG_BOUNCE=y
CONFIG_VIRT_TO_BUS=y
# CONFIG_MTRR is not set
# CONFIG_CC_STACKPROTECTOR is not set
# CONFIG_HZ_100 is not set
# CONFIG_HZ_250 is not set
CONFIG_HZ_300=y
# CONFIG_HZ_1000 is not set
CONFIG_HZ=300
CONFIG_SCHED_HRTICK=y
CONFIG_KEXEC=y
# CONFIG_CRASH_DUMP is not set
CONFIG_PHYSICAL_START=0x200000
CONFIG_RELOCATABLE=y
CONFIG_PHYSICAL_ALIGN=0x200000
# CONFIG_COMPAT_VDSO is not set
CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
#
# Power management options
#
# CONFIG_PM is not set
#
# CPU Frequency scaling
#
CONFIG_CPU_FREQ=y
CONFIG_CPU_FREQ_TABLE=y
# CONFIG_CPU_FREQ_DEBUG is not set
CONFIG_CPU_FREQ_STAT=y
# CONFIG_CPU_FREQ_STAT_DETAILS is not set
CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y
# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set
# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set
# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set
# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set
CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
CONFIG_CPU_FREQ_GOV_POWERSAVE=y
CONFIG_CPU_FREQ_GOV_USERSPACE=y
# CONFIG_CPU_FREQ_GOV_ONDEMAND is not set
# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set
#
# CPUFreq processor drivers
#
CONFIG_X86_POWERNOW_K8=y
# CONFIG_X86_P4_CLOCKMOD is not set
#
# shared options
#
# CONFIG_X86_SPEEDSTEP_LIB is not set
# CONFIG_CPU_IDLE is not set
#
# Bus options (PCI etc.)
#
CONFIG_PCI=y
CONFIG_PCI_DIRECT=y
CONFIG_PCI_DOMAINS=y
CONFIG_PCIEPORTBUS=y
# CONFIG_PCIEAER is not set
# CONFIG_PCIEASPM is not set
CONFIG_ARCH_SUPPORTS_MSI=y
# CONFIG_PCI_MSI is not set
# CONFIG_PCI_LEGACY is not set
# CONFIG_PCI_DEBUG is not set
# CONFIG_HT_IRQ is not set
CONFIG_ISA_DMA_API=y
CONFIG_K8_NB=y
#
# Executable file formats / Emulations
#
CONFIG_BINFMT_ELF=y
CONFIG_COMPAT_BINFMT_ELF=y
# CONFIG_BINFMT_MISC is not set
CONFIG_IA32_EMULATION=y
CONFIG_IA32_AOUT=y
CONFIG_COMPAT=y
CONFIG_COMPAT_FOR_U64_ALIGNMENT=y
#
# Networking
#
CONFIG_NET=y
#
# Networking options
#
CONFIG_PACKET=y
CONFIG_PACKET_MMAP=y
CONFIG_UNIX=y
CONFIG_XFRM=y
# CONFIG_XFRM_SUB_POLICY is not set
CONFIG_XFRM_MIGRATE=y
CONFIG_NET_KEY=y
# CONFIG_NET_KEY_MIGRATE is not set
# CONFIG_INET is not set
CONFIG_NETWORK_SECMARK=y
# CONFIG_NETFILTER is not set
CONFIG_ATM=y
CONFIG_ATM_LANE=y
# CONFIG_BRIDGE is not set
# CONFIG_VLAN_8021Q is not set
CONFIG_DECNET=y
# CONFIG_DECNET_ROUTER is not set
CONFIG_LLC=y
CONFIG_LLC2=y
# CONFIG_IPX is not set
CONFIG_ATALK=y
# CONFIG_DEV_APPLETALK is not set
CONFIG_X25=y
# CONFIG_LAPB is not set
CONFIG_WAN_ROUTER=y
# CONFIG_NET_SCHED is not set
#
# Network testing
#
CONFIG_HAMRADIO=y
#
# Packet Radio protocols
#
# CONFIG_AX25 is not set
CONFIG_CAN=y
CONFIG_CAN_RAW=y
CONFIG_CAN_BCM=y
#
# CAN Device Drivers
#
# CONFIG_CAN_VCAN is not set
CONFIG_CAN_DEBUG_DEVICES=y
CONFIG_IRDA=y
#
# IrDA protocols
#
CONFIG_IRLAN=y
# CONFIG_IRCOMM is not set
# CONFIG_IRDA_ULTRA is not set
#
# IrDA options
#
CONFIG_IRDA_CACHE_LAST_LSAP=y
CONFIG_IRDA_FAST_RR=y
# CONFIG_IRDA_DEBUG is not set
#
# Infrared-port device drivers
#
#
# SIR device drivers
#
# CONFIG_IRTTY_SIR is not set
#
# Dongle support
#
#
# FIR device drivers
#
CONFIG_NSC_FIR=y
# CONFIG_WINBOND_FIR is not set
CONFIG_SMC_IRCC_FIR=y
CONFIG_ALI_FIR=y
# CONFIG_VLSI_FIR is not set
CONFIG_VIA_FIR=y
CONFIG_BT=y
# CONFIG_BT_L2CAP is not set
# CONFIG_BT_SCO is not set
#
# Bluetooth device drivers
#
# CONFIG_BT_HCIUART is not set
# CONFIG_BT_HCIVHCI is not set
#
# Wireless
#
# CONFIG_CFG80211 is not set
CONFIG_WIRELESS_EXT=y
# CONFIG_MAC80211 is not set
CONFIG_IEEE80211=y
# CONFIG_IEEE80211_DEBUG is not set
CONFIG_IEEE80211_CRYPT_WEP=y
# CONFIG_IEEE80211_CRYPT_CCMP is not set
CONFIG_IEEE80211_CRYPT_TKIP=y
CONFIG_RFKILL=y
CONFIG_RFKILL_INPUT=y
CONFIG_NET_9P=y
# CONFIG_NET_9P_DEBUG is not set
#
# Device Drivers
#
#
# Generic Driver Options
#
CONFIG_STANDALONE=y
CONFIG_PREVENT_FIRMWARE_BUILD=y
CONFIG_FW_LOADER=y
# CONFIG_DEBUG_DRIVER is not set
# CONFIG_DEBUG_DEVRES is not set
# CONFIG_SYS_HYPERVISOR is not set
# CONFIG_CONNECTOR is not set
CONFIG_MTD=y
# CONFIG_MTD_DEBUG is not set
CONFIG_MTD_CONCAT=y
CONFIG_MTD_PARTITIONS=y
# CONFIG_MTD_REDBOOT_PARTS is not set
# CONFIG_MTD_CMDLINE_PARTS is not set
# CONFIG_MTD_AR7_PARTS is not set
#
# User Modules And Translation Layers
#
# CONFIG_MTD_CHAR is not set
CONFIG_MTD_BLKDEVS=y
CONFIG_MTD_BLOCK=y
# CONFIG_FTL is not set
CONFIG_NFTL=y
CONFIG_NFTL_RW=y
CONFIG_INFTL=y
# CONFIG_RFD_FTL is not set
CONFIG_SSFDC=y
# CONFIG_MTD_OOPS is not set
#
# RAM/ROM/Flash chip drivers
#
CONFIG_MTD_CFI=y
CONFIG_MTD_JEDECPROBE=y
CONFIG_MTD_GEN_PROBE=y
CONFIG_MTD_CFI_ADV_OPTIONS=y
# CONFIG_MTD_CFI_NOSWAP is not set
# CONFIG_MTD_CFI_BE_BYTE_SWAP is not set
CONFIG_MTD_CFI_LE_BYTE_SWAP=y
# CONFIG_MTD_CFI_GEOMETRY is not set
CONFIG_MTD_MAP_BANK_WIDTH_1=y
CONFIG_MTD_MAP_BANK_WIDTH_2=y
CONFIG_MTD_MAP_BANK_WIDTH_4=y
# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
CONFIG_MTD_CFI_I1=y
CONFIG_MTD_CFI_I2=y
# CONFIG_MTD_CFI_I4 is not set
# CONFIG_MTD_CFI_I8 is not set
CONFIG_MTD_OTP=y
# CONFIG_MTD_CFI_INTELEXT is not set
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_CFI_STAA=y
CONFIG_MTD_CFI_UTIL=y
# CONFIG_MTD_RAM is not set
CONFIG_MTD_ROM=y
# CONFIG_MTD_ABSENT is not set
#
# Mapping drivers for chip access
#
# CONFIG_MTD_COMPLEX_MAPPINGS is not set
# CONFIG_MTD_PHYSMAP is not set
# CONFIG_MTD_SC520CDP is not set
# CONFIG_MTD_NETSC520 is not set
CONFIG_MTD_TS5500=y
# CONFIG_MTD_AMD76XROM is not set
CONFIG_MTD_ICHXROM=y
# CONFIG_MTD_ESB2ROM is not set
# CONFIG_MTD_CK804XROM is not set
CONFIG_MTD_SCB2_FLASH=y
# CONFIG_MTD_NETtel is not set
CONFIG_MTD_L440GX=y
CONFIG_MTD_INTEL_VR_NOR=y
# CONFIG_MTD_PLATRAM is not set
#
# Self-contained MTD device drivers
#
# CONFIG_MTD_PMC551 is not set
# CONFIG_MTD_DATAFLASH is not set
# CONFIG_MTD_M25P80 is not set
CONFIG_MTD_SLRAM=y
CONFIG_MTD_PHRAM=y
# CONFIG_MTD_MTDRAM is not set
CONFIG_MTD_BLOCK2MTD=y
#
# Disk-On-Chip Device Drivers
#
# CONFIG_MTD_DOC2000 is not set
CONFIG_MTD_DOC2001=y
CONFIG_MTD_DOC2001PLUS=y
CONFIG_MTD_DOCPROBE=y
CONFIG_MTD_DOCECC=y
# CONFIG_MTD_DOCPROBE_ADVANCED is not set
CONFIG_MTD_DOCPROBE_ADDRESS=0
# CONFIG_MTD_NAND is not set
CONFIG_MTD_NAND_IDS=y
CONFIG_MTD_ONENAND=y
CONFIG_MTD_ONENAND_VERIFY_WRITE=y
# CONFIG_MTD_ONENAND_OTP is not set
# CONFIG_MTD_ONENAND_2X_PROGRAM is not set
# CONFIG_MTD_ONENAND_SIM is not set
#
# UBI - Unsorted block images
#
# CONFIG_MTD_UBI is not set
CONFIG_PARPORT=y
# CONFIG_PARPORT_PC is not set
# CONFIG_PARPORT_GSC is not set
CONFIG_PARPORT_AX88796=y
# CONFIG_PARPORT_1284 is not set
CONFIG_PARPORT_NOT_PC=y
# CONFIG_BLK_DEV is not set
# CONFIG_MISC_DEVICES is not set
CONFIG_HAVE_IDE=y
CONFIG_IDE=y
CONFIG_IDE_MAX_HWIFS=4
# CONFIG_BLK_DEV_IDE is not set
# CONFIG_BLK_DEV_HD_ONLY is not set
# CONFIG_BLK_DEV_HD is not set
#
# SCSI device support
#
# CONFIG_RAID_ATTRS is not set
CONFIG_SCSI=y
CONFIG_SCSI_DMA=y
CONFIG_SCSI_TGT=y
CONFIG_SCSI_NETLINK=y
#
# SCSI support type (disk, tape, CD-ROM)
#
CONFIG_BLK_DEV_SD=y
# CONFIG_CHR_DEV_ST is not set
# CONFIG_CHR_DEV_OSST is not set
# CONFIG_BLK_DEV_SR is not set
# CONFIG_CHR_DEV_SG is not set
# CONFIG_CHR_DEV_SCH is not set
#
# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
#
CONFIG_SCSI_MULTI_LUN=y
CONFIG_SCSI_CONSTANTS=y
# CONFIG_SCSI_LOGGING is not set
CONFIG_SCSI_SCAN_ASYNC=y
#
# SCSI Transports
#
CONFIG_SCSI_SPI_ATTRS=y
CONFIG_SCSI_FC_ATTRS=y
CONFIG_SCSI_FC_TGT_ATTRS=y
CONFIG_SCSI_ISCSI_ATTRS=y
CONFIG_SCSI_SRP_ATTRS=y
CONFIG_SCSI_SRP_TGT_ATTRS=y
CONFIG_SCSI_LOWLEVEL=y
# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
CONFIG_SCSI_3W_9XXX=y
# CONFIG_SCSI_ACARD is not set
# CONFIG_SCSI_AACRAID is not set
# CONFIG_SCSI_AIC7XXX is not set
# CONFIG_SCSI_AIC7XXX_OLD is not set
# CONFIG_SCSI_AIC79XX is not set
CONFIG_SCSI_DPT_I2O=y
CONFIG_SCSI_ADVANSYS=y
CONFIG_SCSI_ARCMSR=y
CONFIG_MEGARAID_NEWGEN=y
# CONFIG_MEGARAID_MM is not set
CONFIG_MEGARAID_LEGACY=y
# CONFIG_MEGARAID_SAS is not set
CONFIG_SCSI_HPTIOP=y
# CONFIG_SCSI_BUSLOGIC is not set
# CONFIG_SCSI_DMX3191D is not set
# CONFIG_SCSI_EATA is not set
# CONFIG_SCSI_FUTURE_DOMAIN is not set
CONFIG_SCSI_GDTH=y
CONFIG_SCSI_IPS=y
# CONFIG_SCSI_INITIO is not set
CONFIG_SCSI_INIA100=y
CONFIG_SCSI_STEX=y
CONFIG_SCSI_SYM53C8XX_2=y
CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1
CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
# CONFIG_SCSI_SYM53C8XX_MMIO is not set
# CONFIG_SCSI_QLOGIC_1280 is not set
CONFIG_SCSI_QLA_FC=y
CONFIG_SCSI_QLA_ISCSI=y
CONFIG_SCSI_LPFC=y
CONFIG_SCSI_DC395x=y
# CONFIG_SCSI_DC390T is not set
# CONFIG_SCSI_DEBUG is not set
CONFIG_SCSI_SRP=y
# CONFIG_ATA is not set
# CONFIG_MD is not set
CONFIG_FUSION=y
CONFIG_FUSION_SPI=y
CONFIG_FUSION_FC=y
# CONFIG_FUSION_SAS is not set
CONFIG_FUSION_MAX_SGE=128
CONFIG_FUSION_CTL=y
# CONFIG_FUSION_LOGGING is not set
#
# IEEE 1394 (FireWire) support
#
CONFIG_FIREWIRE=y
CONFIG_FIREWIRE_OHCI=y
CONFIG_FIREWIRE_OHCI_DEBUG=y
# CONFIG_FIREWIRE_SBP2 is not set
CONFIG_IEEE1394=y
#
# Subsystem Options
#
CONFIG_IEEE1394_VERBOSEDEBUG=y
#
# Controllers
#
#
# Texas Instruments PCILynx requires I2C
#
# CONFIG_IEEE1394_OHCI1394 is not set
#
# Protocols
#
CONFIG_IEEE1394_SBP2=y
CONFIG_IEEE1394_SBP2_PHYS_DMA=y
# CONFIG_IEEE1394_ETH1394_ROM_ENTRY is not set
# CONFIG_IEEE1394_RAWIO is not set
CONFIG_I2O=y
# CONFIG_I2O_LCT_NOTIFY_ON_CHANGES is not set
CONFIG_I2O_EXT_ADAPTEC=y
# CONFIG_I2O_EXT_ADAPTEC_DMA64 is not set
# CONFIG_I2O_CONFIG is not set
CONFIG_I2O_BUS=y
CONFIG_I2O_BLOCK=y
CONFIG_I2O_SCSI=y
CONFIG_I2O_PROC=y
CONFIG_MACINTOSH_DRIVERS=y
CONFIG_MAC_EMUMOUSEBTN=y
# CONFIG_NETDEVICES is not set
CONFIG_MLX4_CORE=y
# CONFIG_ISDN is not set
CONFIG_PHONE=y
# CONFIG_PHONE_IXJ is not set
#
# Input device support
#
CONFIG_INPUT=y
CONFIG_INPUT_FF_MEMLESS=y
CONFIG_INPUT_POLLDEV=y
#
# Userland interfaces
#
# CONFIG_INPUT_MOUSEDEV is not set
CONFIG_INPUT_JOYDEV=y
CONFIG_INPUT_EVDEV=y
# CONFIG_INPUT_EVBUG is not set
#
# Input Device Drivers
#
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
CONFIG_INPUT_JOYSTICK=y
# CONFIG_JOYSTICK_ANALOG is not set
CONFIG_JOYSTICK_A3D=y
# CONFIG_JOYSTICK_ADI is not set
# CONFIG_JOYSTICK_COBRA is not set
# CONFIG_JOYSTICK_GF2K is not set
# CONFIG_JOYSTICK_GRIP is not set
CONFIG_JOYSTICK_GRIP_MP=y
CONFIG_JOYSTICK_GUILLEMOT=y
CONFIG_JOYSTICK_INTERACT=y
CONFIG_JOYSTICK_SIDEWINDER=y
# CONFIG_JOYSTICK_TMDC is not set
CONFIG_JOYSTICK_IFORCE=y
# CONFIG_JOYSTICK_IFORCE_232 is not set
# CONFIG_JOYSTICK_WARRIOR is not set
# CONFIG_JOYSTICK_MAGELLAN is not set
# CONFIG_JOYSTICK_SPACEORB is not set
# CONFIG_JOYSTICK_SPACEBALL is not set
# CONFIG_JOYSTICK_STINGER is not set
CONFIG_JOYSTICK_TWIDJOY=y
CONFIG_JOYSTICK_ZHENHUA=y
CONFIG_JOYSTICK_DB9=y
CONFIG_JOYSTICK_GAMECON=y
CONFIG_JOYSTICK_TURBOGRAFX=y
CONFIG_JOYSTICK_JOYDUMP=y
CONFIG_INPUT_TABLET=y
CONFIG_INPUT_TOUCHSCREEN=y
# CONFIG_TOUCHSCREEN_ADS7846 is not set
CONFIG_TOUCHSCREEN_FUJITSU=y
CONFIG_TOUCHSCREEN_GUNZE=y
CONFIG_TOUCHSCREEN_ELO=y
# CONFIG_TOUCHSCREEN_MTOUCH is not set
# CONFIG_TOUCHSCREEN_MK712 is not set
# CONFIG_TOUCHSCREEN_PENMOUNT is not set
CONFIG_TOUCHSCREEN_TOUCHRIGHT=y
CONFIG_TOUCHSCREEN_TOUCHWIN=y
# CONFIG_TOUCHSCREEN_UCB1400 is not set
CONFIG_INPUT_MISC=y
CONFIG_INPUT_PCSPKR=y
# CONFIG_INPUT_UINPUT is not set
#
# Hardware I/O ports
#
CONFIG_SERIO=y
# CONFIG_SERIO_I8042 is not set
# CONFIG_SERIO_SERPORT is not set
# CONFIG_SERIO_CT82C710 is not set
CONFIG_SERIO_PARKBD=y
CONFIG_SERIO_PCIPS2=y
CONFIG_SERIO_LIBPS2=y
# CONFIG_SERIO_RAW is not set
CONFIG_GAMEPORT=y
CONFIG_GAMEPORT_NS558=y
# CONFIG_GAMEPORT_L4 is not set
# CONFIG_GAMEPORT_EMU10K1 is not set
CONFIG_GAMEPORT_FM801=y
#
# Character devices
#
# CONFIG_VT is not set
# CONFIG_DEVKMEM is not set
# CONFIG_SERIAL_NONSTANDARD is not set
CONFIG_NOZOMI=y
#
# Serial drivers
#
CONFIG_SERIAL_8250=y
# CONFIG_SERIAL_8250_CONSOLE is not set
CONFIG_FIX_EARLYCON_MEM=y
CONFIG_SERIAL_8250_PCI=y
CONFIG_SERIAL_8250_NR_UARTS=4
CONFIG_SERIAL_8250_RUNTIME_UARTS=4
# CONFIG_SERIAL_8250_EXTENDED is not set
#
# Non-8250 serial port support
#
CONFIG_SERIAL_CORE=y
CONFIG_CONSOLE_POLL=y
CONFIG_SERIAL_JSM=y
# CONFIG_UNIX98_PTYS is not set
# CONFIG_LEGACY_PTYS is not set
# CONFIG_PRINTER is not set
CONFIG_PPDEV=y
# CONFIG_IPMI_HANDLER is not set
CONFIG_HW_RANDOM=y
# CONFIG_HW_RANDOM_INTEL is not set
# CONFIG_HW_RANDOM_AMD is not set
# CONFIG_NVRAM is not set
# CONFIG_R3964 is not set
CONFIG_APPLICOM=y
# CONFIG_MWAVE is not set
# CONFIG_PC8736x_GPIO is not set
CONFIG_RAW_DRIVER=y
CONFIG_MAX_RAW_DEVS=256
# CONFIG_HANGCHECK_TIMER is not set
CONFIG_TCG_TPM=y
# CONFIG_TCG_NSC is not set
CONFIG_TCG_ATMEL=y
# CONFIG_TELCLOCK is not set
CONFIG_DEVPORT=y
# CONFIG_I2C is not set
CONFIG_SPI=y
CONFIG_SPI_DEBUG=y
CONFIG_SPI_MASTER=y
#
# SPI Master Controller Drivers
#
CONFIG_SPI_BITBANG=y
# CONFIG_SPI_BUTTERFLY is not set
CONFIG_SPI_LM70_LLP=y
#
# SPI Protocol Masters
#
CONFIG_SPI_SPIDEV=y
CONFIG_W1=y
#
# 1-wire Bus Masters
#
CONFIG_W1_MASTER_MATROX=y
#
# 1-wire Slaves
#
# CONFIG_W1_SLAVE_THERM is not set
CONFIG_W1_SLAVE_SMEM=y
# CONFIG_W1_SLAVE_DS2433 is not set
CONFIG_W1_SLAVE_DS2760=y
# CONFIG_POWER_SUPPLY is not set
# CONFIG_HWMON is not set
CONFIG_THERMAL=y
CONFIG_WATCHDOG=y
# CONFIG_WATCHDOG_NOWAYOUT is not set
#
# Watchdog Device Drivers
#
CONFIG_SOFT_WATCHDOG=y
CONFIG_ACQUIRE_WDT=y
# CONFIG_ADVANTECH_WDT is not set
CONFIG_ALIM1535_WDT=y
CONFIG_ALIM7101_WDT=y
# CONFIG_SC520_WDT is not set
# CONFIG_EUROTECH_WDT is not set
CONFIG_IB700_WDT=y
CONFIG_IBMASR=y
CONFIG_WAFER_WDT=y
# CONFIG_I6300ESB_WDT is not set
CONFIG_ITCO_WDT=y
CONFIG_ITCO_VENDOR_SUPPORT=y
# CONFIG_IT8712F_WDT is not set
# CONFIG_HP_WATCHDOG is not set
# CONFIG_SC1200_WDT is not set
# CONFIG_PC87413_WDT is not set
CONFIG_60XX_WDT=y
# CONFIG_SBC8360_WDT is not set
CONFIG_CPU5_WDT=y
CONFIG_SMSC37B787_WDT=y
# CONFIG_W83627HF_WDT is not set
CONFIG_W83697HF_WDT=y
# CONFIG_W83877F_WDT is not set
CONFIG_W83977F_WDT=y
CONFIG_MACHZ_WDT=y
CONFIG_SBC_EPX_C3_WATCHDOG=y
#
# PCI-based Watchdog Cards
#
CONFIG_PCIPCWATCHDOG=y
CONFIG_WDTPCI=y
CONFIG_WDT_501_PCI=y
#
# Sonics Silicon Backplane
#
CONFIG_SSB_POSSIBLE=y
CONFIG_SSB=y
CONFIG_SSB_SPROM=y
CONFIG_SSB_PCIHOST_POSSIBLE=y
CONFIG_SSB_PCIHOST=y
# CONFIG_SSB_B43_PCI_BRIDGE is not set
CONFIG_SSB_SILENT=y
CONFIG_SSB_DRIVER_PCICORE_POSSIBLE=y
CONFIG_SSB_DRIVER_PCICORE=y
#
# Multifunction device drivers
#
# CONFIG_MFD_SM501 is not set
CONFIG_HTC_PASIC3=y
#
# Multimedia devices
#
#
# Multimedia core support
#
# CONFIG_VIDEO_DEV is not set
# CONFIG_VIDEO_MEDIA is not set
#
# Multimedia drivers
#
# CONFIG_DAB is not set
#
# Graphics support
#
CONFIG_AGP=y
CONFIG_AGP_AMD64=y
CONFIG_AGP_INTEL=y
# CONFIG_AGP_SIS is not set
CONFIG_AGP_VIA=y
CONFIG_DRM=y
# CONFIG_DRM_TDFX is not set
# CONFIG_DRM_R128 is not set
# CONFIG_DRM_RADEON is not set
# CONFIG_DRM_I810 is not set
# CONFIG_DRM_I830 is not set
# CONFIG_DRM_I915 is not set
# CONFIG_DRM_MGA is not set
# CONFIG_DRM_SIS is not set
CONFIG_DRM_VIA=y
CONFIG_DRM_SAVAGE=y
# CONFIG_VGASTATE is not set
CONFIG_VIDEO_OUTPUT_CONTROL=y
CONFIG_FB=y
# CONFIG_FIRMWARE_EDID is not set
# CONFIG_FB_DDC is not set
CONFIG_FB_CFB_FILLRECT=y
CONFIG_FB_CFB_COPYAREA=y
CONFIG_FB_CFB_IMAGEBLIT=y
# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set
CONFIG_FB_SYS_FILLRECT=y
CONFIG_FB_SYS_COPYAREA=y
CONFIG_FB_SYS_IMAGEBLIT=y
# CONFIG_FB_FOREIGN_ENDIAN is not set
CONFIG_FB_SYS_FOPS=y
CONFIG_FB_DEFERRED_IO=y
CONFIG_FB_HECUBA=y
# CONFIG_FB_SVGALIB is not set
# CONFIG_FB_MACMODES is not set
CONFIG_FB_BACKLIGHT=y
CONFIG_FB_MODE_HELPERS=y
CONFIG_FB_TILEBLITTING=y
#
# Frame buffer hardware drivers
#
CONFIG_FB_CIRRUS=y
# CONFIG_FB_PM2 is not set
# CONFIG_FB_CYBER2000 is not set
CONFIG_FB_ARC=y
CONFIG_FB_ASILIANT=y
# CONFIG_FB_IMSTT is not set
# CONFIG_FB_VGA16 is not set
# CONFIG_FB_VESA is not set
# CONFIG_FB_EFI is not set
CONFIG_FB_N411=y
CONFIG_FB_HGA=y
# CONFIG_FB_HGA_ACCEL is not set
# CONFIG_FB_S1D13XXX is not set
# CONFIG_FB_NVIDIA is not set
# CONFIG_FB_RIVA is not set
# CONFIG_FB_LE80578 is not set
CONFIG_FB_INTEL=y
# CONFIG_FB_INTEL_DEBUG is not set
# CONFIG_FB_INTEL_I2C is not set
# CONFIG_FB_MATROX is not set
CONFIG_FB_RADEON=y
# CONFIG_FB_RADEON_I2C is not set
# CONFIG_FB_RADEON_BACKLIGHT is not set
CONFIG_FB_RADEON_DEBUG=y
# CONFIG_FB_ATY128 is not set
CONFIG_FB_ATY=y
# CONFIG_FB_ATY_CT is not set
CONFIG_FB_ATY_GX=y
CONFIG_FB_ATY_BACKLIGHT=y
# CONFIG_FB_S3 is not set
# CONFIG_FB_SAVAGE is not set
CONFIG_FB_SIS=y
CONFIG_FB_SIS_300=y
CONFIG_FB_SIS_315=y
# CONFIG_FB_NEOMAGIC is not set
CONFIG_FB_KYRO=y
# CONFIG_FB_3DFX is not set
# CONFIG_FB_VOODOO1 is not set
# CONFIG_FB_VT8623 is not set
CONFIG_FB_TRIDENT=y
CONFIG_FB_TRIDENT_ACCEL=y
# CONFIG_FB_ARK is not set
# CONFIG_FB_PM3 is not set
# CONFIG_FB_GEODE is not set
CONFIG_FB_VIRTUAL=y
CONFIG_BACKLIGHT_LCD_SUPPORT=y
# CONFIG_LCD_CLASS_DEVICE is not set
CONFIG_BACKLIGHT_CLASS_DEVICE=y
# CONFIG_BACKLIGHT_CORGI is not set
# CONFIG_BACKLIGHT_PROGEAR is not set
#
# Display device support
#
CONFIG_DISPLAY_SUPPORT=y
#
# Display hardware drivers
#
CONFIG_LOGO=y
# CONFIG_LOGO_LINUX_MONO is not set
# CONFIG_LOGO_LINUX_VGA16 is not set
# CONFIG_LOGO_LINUX_CLUT224 is not set
#
# Sound
#
# CONFIG_SOUND is not set
CONFIG_HID_SUPPORT=y
# CONFIG_HID is not set
# CONFIG_USB_SUPPORT is not set
# CONFIG_MMC is not set
# CONFIG_MEMSTICK is not set
# CONFIG_NEW_LEDS is not set
# CONFIG_ACCESSIBILITY is not set
CONFIG_INFINIBAND=y
CONFIG_INFINIBAND_USER_MAD=y
CONFIG_INFINIBAND_USER_ACCESS=y
CONFIG_INFINIBAND_USER_MEM=y
# CONFIG_INFINIBAND_MTHCA is not set
CONFIG_INFINIBAND_IPATH=y
CONFIG_MLX4_INFINIBAND=y
# CONFIG_INFINIBAND_SRP is not set
# CONFIG_EDAC is not set
CONFIG_RTC_LIB=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_HCTOSYS=y
CONFIG_RTC_HCTOSYS_DEVICE="rtc0"
# CONFIG_RTC_DEBUG is not set
#
# RTC interfaces
#
# CONFIG_RTC_INTF_DEV is not set
CONFIG_RTC_DRV_TEST=y
#
# SPI RTC drivers
#
# CONFIG_RTC_DRV_MAX6902 is not set
# CONFIG_RTC_DRV_R9701 is not set
# CONFIG_RTC_DRV_RS5C348 is not set
#
# Platform RTC drivers
#
# CONFIG_RTC_DRV_CMOS is not set
CONFIG_RTC_DRV_DS1511=y
# CONFIG_RTC_DRV_DS1553 is not set
CONFIG_RTC_DRV_DS1742=y
# CONFIG_RTC_DRV_STK17TA8 is not set
CONFIG_RTC_DRV_M48T86=y
# CONFIG_RTC_DRV_M48T59 is not set
CONFIG_RTC_DRV_V3020=y
#
# on-CPU RTC drivers
#
CONFIG_DMADEVICES=y
#
# DMA Devices
#
# CONFIG_INTEL_IOATDMA is not set
CONFIG_AUXDISPLAY=y
CONFIG_UIO=y
# CONFIG_UIO_CIF is not set
CONFIG_UIO_SMX=y
#
# Firmware Drivers
#
# CONFIG_EDD is not set
# CONFIG_DELL_RBU is not set
CONFIG_DCDBAS=y
CONFIG_ISCSI_IBFT_FIND=y
# CONFIG_ISCSI_IBFT is not set
#
# File systems
#
CONFIG_EXT2_FS=y
# CONFIG_EXT2_FS_XATTR is not set
# CONFIG_EXT2_FS_XIP is not set
CONFIG_EXT3_FS=y
CONFIG_EXT3_FS_XATTR=y
# CONFIG_EXT3_FS_POSIX_ACL is not set
# CONFIG_EXT3_FS_SECURITY is not set
# CONFIG_EXT4DEV_FS is not set
CONFIG_JBD=y
CONFIG_FS_MBCACHE=y
# CONFIG_REISERFS_FS is not set
# CONFIG_JFS_FS is not set
CONFIG_FS_POSIX_ACL=y
CONFIG_XFS_FS=y
# CONFIG_XFS_QUOTA is not set
# CONFIG_XFS_POSIX_ACL is not set
CONFIG_XFS_RT=y
# CONFIG_XFS_DEBUG is not set
CONFIG_GFS2_FS=y
# CONFIG_GFS2_FS_LOCKING_NOLOCK is not set
CONFIG_DNOTIFY=y
# CONFIG_INOTIFY is not set
# CONFIG_QUOTA is not set
# CONFIG_AUTOFS_FS is not set
# CONFIG_AUTOFS4_FS is not set
CONFIG_FUSE_FS=y
#
# CD-ROM/DVD Filesystems
#
# CONFIG_ISO9660_FS is not set
# CONFIG_UDF_FS is not set
#
# DOS/FAT/NT Filesystems
#
CONFIG_FAT_FS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_FAT_DEFAULT_CODEPAGE=437
CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
CONFIG_NTFS_FS=y
# CONFIG_NTFS_DEBUG is not set
CONFIG_NTFS_RW=y
#
# Pseudo filesystems
#
# CONFIG_PROC_FS is not set
# CONFIG_SYSFS is not set
# CONFIG_TMPFS is not set
# CONFIG_HUGETLBFS is not set
# CONFIG_HUGETLB_PAGE is not set
#
# Miscellaneous filesystems
#
CONFIG_ADFS_FS=y
# CONFIG_ADFS_FS_RW is not set
CONFIG_AFFS_FS=y
# CONFIG_ECRYPT_FS is not set
CONFIG_HFS_FS=y
CONFIG_HFSPLUS_FS=y
CONFIG_BEFS_FS=y
CONFIG_BEFS_DEBUG=y
CONFIG_BFS_FS=y
CONFIG_EFS_FS=y
CONFIG_JFFS2_FS=y
CONFIG_JFFS2_FS_DEBUG=0
CONFIG_JFFS2_FS_WRITEBUFFER=y
# CONFIG_JFFS2_FS_WBUF_VERIFY is not set
# CONFIG_JFFS2_SUMMARY is not set
CONFIG_JFFS2_FS_XATTR=y
# CONFIG_JFFS2_FS_POSIX_ACL is not set
CONFIG_JFFS2_FS_SECURITY=y
CONFIG_JFFS2_COMPRESSION_OPTIONS=y
# CONFIG_JFFS2_ZLIB is not set
CONFIG_JFFS2_LZO=y
# CONFIG_JFFS2_RTIME is not set
CONFIG_JFFS2_RUBIN=y
# CONFIG_JFFS2_CMODE_NONE is not set
# CONFIG_JFFS2_CMODE_PRIORITY is not set
CONFIG_JFFS2_CMODE_SIZE=y
# CONFIG_JFFS2_CMODE_FAVOURLZO is not set
CONFIG_CRAMFS=y
CONFIG_VXFS_FS=y
# CONFIG_MINIX_FS is not set
CONFIG_HPFS_FS=y
# CONFIG_QNX4FS_FS is not set
# CONFIG_ROMFS_FS is not set
# CONFIG_SYSV_FS is not set
CONFIG_UFS_FS=y
CONFIG_UFS_FS_WRITE=y
# CONFIG_UFS_DEBUG is not set
# CONFIG_NETWORK_FILESYSTEMS is not set
#
# Partition Types
#
# CONFIG_PARTITION_ADVANCED is not set
CONFIG_AMIGA_PARTITION=y
CONFIG_MSDOS_PARTITION=y
CONFIG_NLS=y
CONFIG_NLS_DEFAULT="iso8859-1"
# CONFIG_NLS_CODEPAGE_437 is not set
# CONFIG_NLS_CODEPAGE_737 is not set
# CONFIG_NLS_CODEPAGE_775 is not set
CONFIG_NLS_CODEPAGE_850=y
# CONFIG_NLS_CODEPAGE_852 is not set
# CONFIG_NLS_CODEPAGE_855 is not set
# CONFIG_NLS_CODEPAGE_857 is not set
CONFIG_NLS_CODEPAGE_860=y
CONFIG_NLS_CODEPAGE_861=y
# CONFIG_NLS_CODEPAGE_862 is not set
CONFIG_NLS_CODEPAGE_863=y
CONFIG_NLS_CODEPAGE_864=y
# CONFIG_NLS_CODEPAGE_865 is not set
# CONFIG_NLS_CODEPAGE_866 is not set
# CONFIG_NLS_CODEPAGE_869 is not set
# CONFIG_NLS_CODEPAGE_936 is not set
CONFIG_NLS_CODEPAGE_950=y
# CONFIG_NLS_CODEPAGE_932 is not set
# CONFIG_NLS_CODEPAGE_949 is not set
CONFIG_NLS_CODEPAGE_874=y
CONFIG_NLS_ISO8859_8=y
CONFIG_NLS_CODEPAGE_1250=y
# CONFIG_NLS_CODEPAGE_1251 is not set
CONFIG_NLS_ASCII=y
CONFIG_NLS_ISO8859_1=y
CONFIG_NLS_ISO8859_2=y
CONFIG_NLS_ISO8859_3=y
# CONFIG_NLS_ISO8859_4 is not set
CONFIG_NLS_ISO8859_5=y
CONFIG_NLS_ISO8859_6=y
# CONFIG_NLS_ISO8859_7 is not set
CONFIG_NLS_ISO8859_9=y
CONFIG_NLS_ISO8859_13=y
CONFIG_NLS_ISO8859_14=y
# CONFIG_NLS_ISO8859_15 is not set
CONFIG_NLS_KOI8_R=y
CONFIG_NLS_KOI8_U=y
CONFIG_NLS_UTF8=y
#
# Kernel hacking
#
CONFIG_TRACE_IRQFLAGS_SUPPORT=y
CONFIG_ENABLE_WARN_DEPRECATED=y
CONFIG_ENABLE_MUST_CHECK=y
CONFIG_FRAME_WARN=2048
CONFIG_MAGIC_SYSRQ=y
# CONFIG_UNUSED_SYMBOLS is not set
# CONFIG_HEADERS_CHECK is not set
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_SHIRQ=y
CONFIG_DETECT_SOFTLOCKUP=y
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=1
CONFIG_SCHED_DEBUG=y
CONFIG_SCHEDSTATS=y
# CONFIG_DEBUG_OBJECTS is not set
CONFIG_DEBUG_SPINLOCK=y
CONFIG_DEBUG_MUTEXES=y
CONFIG_DEBUG_LOCK_ALLOC=y
# CONFIG_PROVE_LOCKING is not set
CONFIG_LOCKDEP=y
CONFIG_LOCK_STAT=y
CONFIG_DEBUG_LOCKDEP=y
CONFIG_DEBUG_SPINLOCK_SLEEP=y
CONFIG_DEBUG_LOCKING_API_SELFTESTS=y
CONFIG_STACKTRACE=y
CONFIG_DEBUG_KOBJECT=y
CONFIG_DEBUG_VM=y
# CONFIG_DEBUG_WRITECOUNT is not set
CONFIG_DEBUG_LIST=y
CONFIG_DEBUG_SG=y
CONFIG_FRAME_POINTER=y
# CONFIG_BACKTRACE_SELF_TEST is not set
CONFIG_FAULT_INJECTION=y
CONFIG_FAILSLAB=y
# CONFIG_FAIL_PAGE_ALLOC is not set
CONFIG_FAIL_MAKE_REQUEST=y
CONFIG_LATENCYTOP=y
CONFIG_HAVE_FTRACE=y
CONFIG_HAVE_DYNAMIC_FTRACE=y
# CONFIG_FTRACE is not set
# CONFIG_IRQSOFF_TRACER is not set
# CONFIG_SYSPROF_TRACER is not set
# CONFIG_SCHED_TRACER is not set
# CONFIG_CONTEXT_SWITCH_TRACER is not set
CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
# CONFIG_FIREWIRE_OHCI_REMOTE_DMA is not set
CONFIG_SAMPLES=y
CONFIG_SAMPLE_KOBJECT=y
CONFIG_HAVE_ARCH_KGDB=y
CONFIG_KGDB=y
CONFIG_KGDB_SERIAL_CONSOLE=y
CONFIG_KGDB_TESTS=y
# CONFIG_KGDB_TESTS_ON_BOOT is not set
CONFIG_NONPROMISC_DEVMEM=y
# CONFIG_EARLY_PRINTK is not set
# CONFIG_DEBUG_STACKOVERFLOW is not set
# CONFIG_DEBUG_STACK_USAGE is not set
CONFIG_DEBUG_PAGEALLOC=y
# CONFIG_DEBUG_PER_CPU_MAPS is not set
# CONFIG_X86_PTDUMP is not set
CONFIG_DEBUG_RODATA=y
CONFIG_DIRECT_GBPAGES=y
CONFIG_DEBUG_RODATA_TEST=y
CONFIG_X86_MPPARSE=y
# CONFIG_IOMMU_DEBUG is not set
# CONFIG_MMIOTRACE is not set
CONFIG_IO_DELAY_TYPE_0X80=0
CONFIG_IO_DELAY_TYPE_0XED=1
CONFIG_IO_DELAY_TYPE_UDELAY=2
CONFIG_IO_DELAY_TYPE_NONE=3
# CONFIG_IO_DELAY_0X80 is not set
# CONFIG_IO_DELAY_0XED is not set
CONFIG_IO_DELAY_UDELAY=y
# CONFIG_IO_DELAY_NONE is not set
CONFIG_DEFAULT_IO_DELAY_TYPE=2
CONFIG_CPA_DEBUG=y
#
# Security options
#
CONFIG_KEYS=y
CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_SECURITY_FILE_CAPABILITIES=y
CONFIG_CRYPTO=y
#
# Crypto core or helper
#
CONFIG_CRYPTO_ALGAPI=y
CONFIG_CRYPTO_AEAD=y
CONFIG_CRYPTO_BLKCIPHER=y
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_GF128MUL=y
CONFIG_CRYPTO_NULL=y
# CONFIG_CRYPTO_CRYPTD is not set
# CONFIG_CRYPTO_AUTHENC is not set
#
# Authenticated Encryption with Associated Data
#
CONFIG_CRYPTO_CCM=y
CONFIG_CRYPTO_GCM=y
CONFIG_CRYPTO_SEQIV=y
#
# Block modes
#
# CONFIG_CRYPTO_CBC is not set
CONFIG_CRYPTO_CTR=y
CONFIG_CRYPTO_CTS=y
CONFIG_CRYPTO_ECB=y
# CONFIG_CRYPTO_LRW is not set
# CONFIG_CRYPTO_PCBC is not set
# CONFIG_CRYPTO_XTS is not set
#
# Hash modes
#
# CONFIG_CRYPTO_HMAC is not set
# CONFIG_CRYPTO_XCBC is not set
#
# Digest
#
# CONFIG_CRYPTO_CRC32C is not set
# CONFIG_CRYPTO_MD4 is not set
# CONFIG_CRYPTO_MD5 is not set
CONFIG_CRYPTO_MICHAEL_MIC=y
CONFIG_CRYPTO_SHA1=y
# CONFIG_CRYPTO_SHA256 is not set
CONFIG_CRYPTO_SHA512=y
# CONFIG_CRYPTO_TGR192 is not set
CONFIG_CRYPTO_WP512=y
#
# Ciphers
#
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_X86_64=y
CONFIG_CRYPTO_ANUBIS=y
CONFIG_CRYPTO_ARC4=y
# CONFIG_CRYPTO_BLOWFISH is not set
# CONFIG_CRYPTO_CAMELLIA is not set
CONFIG_CRYPTO_CAST5=y
CONFIG_CRYPTO_CAST6=y
CONFIG_CRYPTO_DES=y
# CONFIG_CRYPTO_FCRYPT is not set
CONFIG_CRYPTO_KHAZAD=y
CONFIG_CRYPTO_SALSA20=y
CONFIG_CRYPTO_SALSA20_X86_64=y
CONFIG_CRYPTO_SEED=y
CONFIG_CRYPTO_SERPENT=y
# CONFIG_CRYPTO_TEA is not set
CONFIG_CRYPTO_TWOFISH=y
CONFIG_CRYPTO_TWOFISH_COMMON=y
# CONFIG_CRYPTO_TWOFISH_X86_64 is not set
#
# Compression
#
# CONFIG_CRYPTO_DEFLATE is not set
CONFIG_CRYPTO_LZO=y
# CONFIG_CRYPTO_HW is not set
CONFIG_HAVE_KVM=y
# CONFIG_VIRTUALIZATION is not set
#
# Library routines
#
CONFIG_BITREVERSE=y
CONFIG_GENERIC_FIND_FIRST_BIT=y
CONFIG_GENERIC_FIND_NEXT_BIT=y
CONFIG_CRC_CCITT=y
CONFIG_CRC16=y
CONFIG_CRC_ITU_T=y
CONFIG_CRC32=y
CONFIG_CRC7=y
CONFIG_LIBCRC32C=y
CONFIG_ZLIB_INFLATE=y
CONFIG_LZO_COMPRESS=y
CONFIG_LZO_DECOMPRESS=y
CONFIG_HAS_IOMEM=y
CONFIG_HAS_IOPORT=y
CONFIG_HAS_DMA=y
CONFIG_FORCE_SUCCESSFUL_BUILD=y
^ permalink raw reply [flat|nested] 119+ messages in thread
* Re: [PATCH 4/4] x86: Replace xxx_pda() operations with x86_xx_percpu().
2008-06-09 13:03 ` Ingo Molnar
@ 2008-06-09 16:08 ` Mike Travis
2008-06-09 17:36 ` Mike Travis
1 sibling, 0 replies; 119+ messages in thread
From: Mike Travis @ 2008-06-09 16:08 UTC (permalink / raw)
To: Ingo Molnar
Cc: Andrew Morton, Christoph Lameter, David Miller, Eric Dumazet,
Jeremy Fitzhardinge, linux-kernel
Ingo Molnar wrote:
> * Mike Travis <travis@sgi.com> wrote:
>
>> * It is now possible to use percpu operations for pda access
>> since the pda is in the percpu area. Drop the pda operations.
>
> FYI, this one didnt build with the attached config.
>
> Ingo
>
Ok, thanks, I will check it out.
I'm still having problems getting your previous "instantaneous
reboot" problem working. It has something to do with cpu_clock
interrupt, but I've still not figured out why it's failing.
THanks,
Mike
^ permalink raw reply [flat|nested] 119+ messages in thread
* Re: [PATCH 4/4] x86: Replace xxx_pda() operations with x86_xx_percpu().
2008-06-09 13:03 ` Ingo Molnar
2008-06-09 16:08 ` Mike Travis
@ 2008-06-09 17:36 ` Mike Travis
2008-06-09 18:20 ` Christoph Lameter
2008-06-10 10:09 ` Ingo Molnar
1 sibling, 2 replies; 119+ messages in thread
From: Mike Travis @ 2008-06-09 17:36 UTC (permalink / raw)
To: Ingo Molnar
Cc: Andrew Morton, Christoph Lameter, David Miller, Eric Dumazet,
Jeremy Fitzhardinge, linux-kernel
Ingo Molnar wrote:
> * Mike Travis <travis@sgi.com> wrote:
>
>> * It is now possible to use percpu operations for pda access
>> since the pda is in the percpu area. Drop the pda operations.
>
> FYI, this one didnt build with the attached config.
>
> Ingo
>
Hi Ingo,
Can you send me the output from the build? It builds fine on my
machine (a few warnings). The silentoldconfig made these changes
to the .config file. (I did a git-remote update and reapplied my
changes before building.)
Thanks,
Mike
--- ../configs/ingo-test-9 2008-06-09 10:25:32.148026511 -0700
+++ ../build/ingo-test-9/.config 2008-06-09 10:30:15.273171501 -0700
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.26-rc5
-# Mon Jun 9 14:59:39 2008
+# Linux kernel version: 2.6.26-rc4
+# Mon Jun 9 10:30:10 2008
#
CONFIG_64BIT=y
# CONFIG_X86_32 is not set
@@ -36,6 +36,7 @@
CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
CONFIG_HAVE_SETUP_PER_CPU_AREA=y
CONFIG_HAVE_CPUMASK_OF_CPU_MAP=y
+CONFIG_HAVE_ZERO_BASED_PER_CPU=y
CONFIG_ARCH_HIBERNATION_POSSIBLE=y
CONFIG_ARCH_SUSPEND_POSSIBLE=y
CONFIG_ZONE_DMA32=y
@@ -52,23 +53,16 @@
CONFIG_X86_BIOS_REBOOT=y
CONFIG_X86_TRAMPOLINE=y
# CONFIG_KTIME_SCALAR is not set
-# CONFIG_BOOTPARAM_SUPPORT_WANTED is not set
CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
#
# General setup
#
CONFIG_EXPERIMENTAL=y
-CONFIG_BROKEN_BOOT_ALLOWED3=y
-CONFIG_BROKEN_BOOT_ALLOWED2=y
-CONFIG_BROKEN_BOOT_ALLOWED=y
-CONFIG_BROKEN_BOOT=y
-CONFIG_BROKEN_BOOT_EUROPE=y
-CONFIG_BROKEN_BOOT_TITAN=y
CONFIG_LOCK_KERNEL=y
CONFIG_INIT_ENV_ARG_LIMIT=32
-CONFIG_LOCALVERSION=""
-# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_LOCALVERSION="-ingo-test-9"
+CONFIG_LOCALVERSION_AUTO=y
# CONFIG_SWAP is not set
# CONFIG_SYSVIPC is not set
# CONFIG_POSIX_MQUEUE is not set
@@ -152,15 +146,16 @@
# CONFIG_NO_HZ is not set
CONFIG_HIGH_RES_TIMERS=y
CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
-CONFIG_SMP_SUPPORT=y
-CONFIG_UP_WANTED_1=y
-# CONFIG_UP_WANTED_2 is not set
CONFIG_SMP=y
CONFIG_X86_PC=y
# CONFIG_X86_ELAN is not set
# CONFIG_X86_VOYAGER is not set
+# CONFIG_X86_NUMAQ is not set
+# CONFIG_X86_SUMMIT is not set
+# CONFIG_X86_BIGSMP is not set
# CONFIG_X86_VISWS is not set
# CONFIG_X86_GENERICARCH is not set
+# CONFIG_X86_ES7000 is not set
# CONFIG_X86_RDC321X is not set
# CONFIG_X86_VSMP is not set
CONFIG_PARAVIRT_GUEST=y
@@ -616,6 +611,7 @@
CONFIG_SCSI_FC_ATTRS=y
CONFIG_SCSI_FC_TGT_ATTRS=y
CONFIG_SCSI_ISCSI_ATTRS=y
+# CONFIG_SCSI_SAS_LIBSAS is not set
CONFIG_SCSI_SRP_ATTRS=y
CONFIG_SCSI_SRP_TGT_ATTRS=y
CONFIG_SCSI_LOWLEVEL=y
@@ -626,6 +622,7 @@
# CONFIG_SCSI_AIC7XXX is not set
# CONFIG_SCSI_AIC7XXX_OLD is not set
# CONFIG_SCSI_AIC79XX is not set
+# CONFIG_SCSI_AIC94XX is not set
CONFIG_SCSI_DPT_I2O=y
CONFIG_SCSI_ADVANSYS=y
CONFIG_SCSI_ARCMSR=y
@@ -642,6 +639,7 @@
CONFIG_SCSI_IPS=y
# CONFIG_SCSI_INITIO is not set
CONFIG_SCSI_INIA100=y
+# CONFIG_SCSI_MVSAS is not set
CONFIG_SCSI_STEX=y
CONFIG_SCSI_SYM53C8XX_2=y
CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1
@@ -1302,6 +1300,7 @@
CONFIG_DEBUG_LOCKING_API_SELFTESTS=y
CONFIG_STACKTRACE=y
CONFIG_DEBUG_KOBJECT=y
+# CONFIG_DEBUG_INFO is not set
CONFIG_DEBUG_VM=y
# CONFIG_DEBUG_WRITECOUNT is not set
CONFIG_DEBUG_LIST=y
@@ -1460,4 +1459,3 @@
CONFIG_HAS_IOMEM=y
CONFIG_HAS_IOPORT=y
CONFIG_HAS_DMA=y
-CONFIG_FORCE_SUCCESSFUL_BUILD=y
^ permalink raw reply [flat|nested] 119+ messages in thread
* Re: [PATCH 4/4] x86: Replace xxx_pda() operations with x86_xx_percpu().
2008-06-09 17:36 ` Mike Travis
@ 2008-06-09 18:20 ` Christoph Lameter
2008-06-09 23:29 ` Jeremy Fitzhardinge
2008-06-10 10:09 ` Ingo Molnar
1 sibling, 1 reply; 119+ messages in thread
From: Christoph Lameter @ 2008-06-09 18:20 UTC (permalink / raw)
To: Mike Travis
Cc: Ingo Molnar, Andrew Morton, David Miller, Eric Dumazet,
Jeremy Fitzhardinge, linux-kernel
Paravirt support is on. We have seen an issue with that in the past. Why
was that again?
Also check that there is really no use of the segment register before
start_kernel() loads it. If the segment register is used to refer to pda
stuff before start_kernel then we need to make sure that the right value
is loaded in the asm (head64.c).
^ permalink raw reply [flat|nested] 119+ messages in thread
* Re: [PATCH 4/4] x86: Replace xxx_pda() operations with x86_xx_percpu().
2008-06-09 18:20 ` Christoph Lameter
@ 2008-06-09 23:29 ` Jeremy Fitzhardinge
0 siblings, 0 replies; 119+ messages in thread
From: Jeremy Fitzhardinge @ 2008-06-09 23:29 UTC (permalink / raw)
To: Christoph Lameter
Cc: Mike Travis, Ingo Molnar, Andrew Morton, David Miller,
Eric Dumazet, linux-kernel
Christoph Lameter wrote:
> Paravirt support is on. We have seen an issue with that in the past. Why
> was that again?
>
I'm not aware of any paravirt-related percpu bugs (other than the fact
that the last major revision of per-cpu variables was done under the
aegis of paravirt-ops).
But booting on bare hardware with pvops enabled should be exactly the
same as non-pvops with respect to percpu.
J
^ permalink raw reply [flat|nested] 119+ messages in thread
* Re: [PATCH 4/4] x86: Replace xxx_pda() operations with x86_xx_percpu().
2008-06-09 17:36 ` Mike Travis
2008-06-09 18:20 ` Christoph Lameter
@ 2008-06-10 10:09 ` Ingo Molnar
2008-06-10 15:07 ` Mike Travis
1 sibling, 1 reply; 119+ messages in thread
From: Ingo Molnar @ 2008-06-10 10:09 UTC (permalink / raw)
To: Mike Travis
Cc: Andrew Morton, Christoph Lameter, David Miller, Eric Dumazet,
Jeremy Fitzhardinge, linux-kernel
* Mike Travis <travis@sgi.com> wrote:
> Ingo Molnar wrote:
> > * Mike Travis <travis@sgi.com> wrote:
> >
> >> * It is now possible to use percpu operations for pda access
> >> since the pda is in the percpu area. Drop the pda operations.
> >
> > FYI, this one didnt build with the attached config.
> >
> > Ingo
> >
>
> Hi Ingo,
>
> Can you send me the output from the build? It builds fine on my
> machine (a few warnings). The silentoldconfig made these changes to
> the .config file. (I did a git-remote update and reapplied my changes
> before building.)
dont have the log anymore, but i had to revert 3/4 due to the boot
crash, could there be a dependency of 4/4 on 3/4?
the first two patches look fine and survived -tip testing.
Ingo
^ permalink raw reply [flat|nested] 119+ messages in thread
* Re: [PATCH 4/4] x86: Replace xxx_pda() operations with x86_xx_percpu().
2008-06-10 10:09 ` Ingo Molnar
@ 2008-06-10 15:07 ` Mike Travis
0 siblings, 0 replies; 119+ messages in thread
From: Mike Travis @ 2008-06-10 15:07 UTC (permalink / raw)
To: Ingo Molnar
Cc: Andrew Morton, Christoph Lameter, David Miller, Eric Dumazet,
Jeremy Fitzhardinge, linux-kernel
Ingo Molnar wrote:
> * Mike Travis <travis@sgi.com> wrote:
>
>> Ingo Molnar wrote:
>>> * Mike Travis <travis@sgi.com> wrote:
>>>
>>>> * It is now possible to use percpu operations for pda access
>>>> since the pda is in the percpu area. Drop the pda operations.
>>> FYI, this one didnt build with the attached config.
>>>
>>> Ingo
>>>
>> Hi Ingo,
>>
>> Can you send me the output from the build? It builds fine on my
>> machine (a few warnings). The silentoldconfig made these changes to
>> the .config file. (I did a git-remote update and reapplied my changes
>> before building.)
>
> dont have the log anymore, but i had to revert 3/4 due to the boot
> crash, could there be a dependency of 4/4 on 3/4?
>
> the first two patches look fine and survived -tip testing.
>
> Ingo
Thanks Ingo. I'm narrowing down the reboot problem by pulling config
items out. I managed to get it to boot but now am trying to understand
how those config options are causing the problem.
Btw, it does not panic/reboot on linux-next with the original config.
Perhaps I should try and bisect to an earlier patch to see what that
reveals?
Thanks,
Mike
^ permalink raw reply [flat|nested] 119+ messages in thread
* [PATCH] x86: collapse the various size-dependent percpu accessors together
2008-06-04 0:30 [PATCH 0/4] percpu: Optimize percpu accesses Mike Travis
` (3 preceding siblings ...)
2008-06-04 0:30 ` [PATCH 4/4] x86: Replace xxx_pda() operations with x86_xx_percpu() Mike Travis
@ 2008-06-04 10:18 ` Jeremy Fitzhardinge
2008-06-04 10:45 ` Jeremy Fitzhardinge
4 siblings, 1 reply; 119+ messages in thread
From: Jeremy Fitzhardinge @ 2008-06-04 10:18 UTC (permalink / raw)
To: Mike Travis
Cc: Ingo Molnar, Andrew Morton, Christoph Lameter, David Miller,
Eric Dumazet, linux-kernel
We can use gcc's %z modifier to emit the appropriate size suffix for
an instruction, so we don't need to duplicate the asm statement for
each size.
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
include/asm-x86/percpu.h | 56 +++-------------------------------------------
1 file changed, 4 insertions(+), 52 deletions(-)
===================================================================
--- a/include/asm-x86/percpu.h
+++ b/include/asm-x86/percpu.h
@@ -75,22 +75,10 @@
} \
switch (sizeof(var)) { \
case 1: \
- asm(op "b %1,"__percpu_seg"%0" \
- : "+m" (var) \
- : "ri" ((T__)val)); \
- break; \
case 2: \
- asm(op "w %1,"__percpu_seg"%0" \
- : "+m" (var) \
- : "ri" ((T__)val)); \
- break; \
case 4: \
- asm(op "l %1,"__percpu_seg"%0" \
- : "+m" (var) \
- : "ri" ((T__)val)); \
- break; \
case 8: \
- asm(op "q %1,"__percpu_seg"%0" \
+ asm(op "%z0 %1,"__percpu_seg"%0" \
: "+m" (var) \
: "ri" ((T__)val)); \
break; \
@@ -103,22 +91,10 @@
typeof(var) ret__; \
switch (sizeof(var)) { \
case 1: \
- asm(op "b "__percpu_seg"%1,%0" \
- : "=r" (ret__) \
- : "m" (var)); \
- break; \
case 2: \
- asm(op "w "__percpu_seg"%1,%0" \
- : "=r" (ret__) \
- : "m" (var)); \
- break; \
case 4: \
- asm(op "l "__percpu_seg"%1,%0" \
- : "=r" (ret__) \
- : "m" (var)); \
- break; \
case 8: \
- asm(op "q "__percpu_seg"%1,%0" \
+ asm(op "%z1 "__percpu_seg"%1,%0" \
: "=r" (ret__) \
: "m" (var)); \
break; \
@@ -131,19 +107,10 @@
({ \
switch (sizeof(var)) { \
case 1: \
- asm(op "b "__percpu_seg"%0" \
- : : "m"(var)); \
- break; \
case 2: \
- asm(op "w "__percpu_seg"%0" \
- : : "m"(var)); \
- break; \
case 4: \
- asm(op "l "__percpu_seg"%0" \
- : : "m"(var)); \
- break; \
case 8: \
- asm(op "q "__percpu_seg"%0" \
+ asm(op "%z0 "__percpu_seg"%0" \
: : "m"(var)); \
break; \
default: __bad_percpu_size(); \
@@ -155,25 +122,10 @@
typeof(var) prev; \
switch (sizeof(var)) { \
case 1: \
- asm("cmpxchgb %b1, "__percpu_seg"%2" \
- : "=a"(prev) \
- : "q"(new), "m"(var), "0"(old) \
- : "memory"); \
- break; \
case 2: \
- asm("cmpxchgw %w1, "__percpu_seg"%2" \
- : "=a"(prev) \
- : "r"(new), "m"(var), "0"(old) \
- : "memory"); \
- break; \
case 4: \
- asm("cmpxchgl %k1, "__percpu_seg"%2" \
- : "=a"(prev) \
- : "r"(new), "m"(var), "0"(old) \
- : "memory"); \
- break; \
case 8: \
- asm("cmpxchgq %1, "__percpu_seg"%2" \
+ asm("cmpxchg%z1 %1, "__percpu_seg"%2" \
: "=a"(prev) \
: "r"(new), "m"(var), "0"(old) \
: "memory"); \
^ permalink raw reply [flat|nested] 119+ messages in thread* Re: [PATCH] x86: collapse the various size-dependent percpu accessors together
2008-06-04 10:18 ` [PATCH] x86: collapse the various size-dependent percpu accessors together Jeremy Fitzhardinge
@ 2008-06-04 10:45 ` Jeremy Fitzhardinge
2008-06-04 11:29 ` Ingo Molnar
0 siblings, 1 reply; 119+ messages in thread
From: Jeremy Fitzhardinge @ 2008-06-04 10:45 UTC (permalink / raw)
To: Mike Travis
Cc: Ingo Molnar, Andrew Morton, Christoph Lameter, David Miller,
Eric Dumazet, linux-kernel
Jeremy Fitzhardinge wrote:
> We can use gcc's %z modifier to emit the appropriate size suffix for
> an instruction, so we don't need to duplicate the asm statement for
> each size.
Nah, it's a disaster. Drop this one.
J
^ permalink raw reply [flat|nested] 119+ messages in thread* Re: [PATCH] x86: collapse the various size-dependent percpu accessors together
2008-06-04 10:45 ` Jeremy Fitzhardinge
@ 2008-06-04 11:29 ` Ingo Molnar
2008-06-04 12:09 ` Jeremy Fitzhardinge
0 siblings, 1 reply; 119+ messages in thread
From: Ingo Molnar @ 2008-06-04 11:29 UTC (permalink / raw)
To: Jeremy Fitzhardinge
Cc: Mike Travis, Andrew Morton, Christoph Lameter, David Miller,
Eric Dumazet, linux-kernel
* Jeremy Fitzhardinge <jeremy@goop.org> wrote:
> Jeremy Fitzhardinge wrote:
>> We can use gcc's %z modifier to emit the appropriate size suffix for
>> an instruction, so we don't need to duplicate the asm statement for
>> each size.
>
> Nah, it's a disaster. Drop this one.
hm, what's the problem with it? What you are trying to do here looks
like a nice cleanup - assuming it results in the same instructions
emitted ;-)
Ingo
^ permalink raw reply [flat|nested] 119+ messages in thread
* Re: [PATCH] x86: collapse the various size-dependent percpu accessors together
2008-06-04 11:29 ` Ingo Molnar
@ 2008-06-04 12:09 ` Jeremy Fitzhardinge
2008-06-10 17:21 ` Christoph Lameter
0 siblings, 1 reply; 119+ messages in thread
From: Jeremy Fitzhardinge @ 2008-06-04 12:09 UTC (permalink / raw)
To: Ingo Molnar
Cc: Mike Travis, Andrew Morton, Christoph Lameter, David Miller,
Eric Dumazet, linux-kernel
Ingo Molnar wrote:
> * Jeremy Fitzhardinge <jeremy@goop.org> wrote:
>
>
>> Jeremy Fitzhardinge wrote:
>>
>>> We can use gcc's %z modifier to emit the appropriate size suffix for
>>> an instruction, so we don't need to duplicate the asm statement for
>>> each size.
>>>
>> Nah, it's a disaster. Drop this one.
>>
>
> hm, what's the problem with it? What you are trying to do here looks
> like a nice cleanup - assuming it results in the same instructions
> emitted ;-)
Yes, would have been lovely. But gcc emits junk:
CC arch/x86/xen/enlighten.o
{standard input}: Assembler messages:
{standard input}:637: Error: no such instruction: `movll %gs:per_cpu__xen_vcpu(%rip),%rax'
{standard input}:655: Error: no such instruction: `movll %gs:per_cpu__xen_vcpu(%rip),%rax'
{standard input}:671: Error: no such instruction: `movll %gs:per_cpu__xen_vcpu(%rip),%rax'
{standard input}:682: Error: no such instruction: `movll %gs:per_cpu__xen_vcpu(%rip),%rax'
{standard input}:783: Error: no such instruction: `movll %gs:per_cpu__pda+8(%rip),%rbx'
{standard input}:834: Error: no such instruction: `movll %gs:per_cpu__xen_mc_irq_flags(%rip),%rdi'
{standard input}:901: Error: no such instruction: `movll %gs:per_cpu__pda+8(%rip),%rbx'
{standard input}:978: Error: no such instruction: `movll %gs:per_cpu__xen_mc_irq_flags(%rip),%rdi'
{standard input}:1064: Error: no such instruction: `movll %gs:per_cpu__pda+8(%rip),%rbx'
{standard input}:1110: Error: no such instruction: `movll %gs:per_cpu__xen_mc_irq_flags(%rip),%rdi'
...
CC arch/x86/vdso/vclock_gettime.o
{standard input}: Assembler messages:
{standard input}:75: Error: suffix or operands invalid for `movs'
(all over the place)
I tried a version to do 64-bit accesses with an explicit "movq" to solve
the "movll" problem, but it generates "movs" on occasion and that was
the point I gave up.
J
^ permalink raw reply [flat|nested] 119+ messages in thread
* [PATCH 3/4] x86_64: Fold pda into per cpu area
2008-07-25 21:11 [PATCH 0/4] x86_64: Optimize percpu accesses Mike Travis
@ 2008-07-25 21:11 ` Mike Travis
0 siblings, 0 replies; 119+ messages in thread
From: Mike Travis @ 2008-07-25 21:11 UTC (permalink / raw)
To: Ingo Molnar, Andrew Morton
Cc: Eric W. Biederman, Hugh Dickins, Jack Steiner,
Jeremy Fitzhardinge, H. Peter Anvin, linux-kernel,
Christoph Lameter
[-- Attachment #1: fold_pda_into_percpu --]
[-- Type: text/plain, Size: 13594 bytes --]
WARNING: there are two FIXME's in arch/x86/xen/enlighten.c
and arch/x86/xen/smp.c that I'm not sure how to handle...?
* Declare the pda as a per cpu variable.
* Relocate the initial pda in head_64.S for the boot cpu (0).
For secondary cpus, do_boot_cpu() sets up the correct initial pda.
Based on linux-2.6.tip/master
Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Mike Travis <travis@sgi.com>
---
arch/x86/kernel/cpu/common_64.c | 4 -
arch/x86/kernel/head64.c | 29 +-----------
arch/x86/kernel/head_64.S | 19 ++++++--
arch/x86/kernel/setup_percpu.c | 93 +++++++++++-----------------------------
arch/x86/kernel/smpboot.c | 53 ----------------------
arch/x86/xen/enlighten.c | 10 ++++
arch/x86/xen/smp.c | 11 +---
include/asm-x86/desc.h | 5 ++
include/asm-x86/pda.h | 3 -
include/asm-x86/percpu.h | 13 -----
include/asm-x86/setup.h | 1
include/asm-x86/smp.h | 2
include/asm-x86/trampoline.h | 1
13 files changed, 72 insertions(+), 172 deletions(-)
--- linux-2.6.tip.orig/arch/x86/kernel/cpu/common_64.c
+++ linux-2.6.tip/arch/x86/kernel/cpu/common_64.c
@@ -418,8 +418,8 @@ __setup("clearcpuid=", setup_disablecpui
cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
-struct x8664_pda **_cpu_pda __read_mostly;
-EXPORT_SYMBOL(_cpu_pda);
+DEFINE_PER_CPU_FIRST(struct x8664_pda, pda);
+EXPORT_PER_CPU_SYMBOL(pda);
struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
--- linux-2.6.tip.orig/arch/x86/kernel/head64.c
+++ linux-2.6.tip/arch/x86/kernel/head64.c
@@ -25,27 +25,6 @@
#include <asm/e820.h>
#include <asm/bios_ebda.h>
-/* boot cpu pda */
-static struct x8664_pda _boot_cpu_pda __read_mostly;
-
-#ifdef CONFIG_SMP
-/*
- * We install an empty cpu_pda pointer table to indicate to early users
- * (numa_set_node) that the cpu_pda pointer table for cpus other than
- * the boot cpu is not yet setup.
- */
-static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata;
-#else
-static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly;
-#endif
-
-void __init x86_64_init_pda(void)
-{
- _cpu_pda = __cpu_pda;
- cpu_pda(0) = &_boot_cpu_pda;
- pda_init(0);
-}
-
static void __init zap_identity_mappings(void)
{
pgd_t *pgd = pgd_offset_k(0UL);
@@ -98,6 +77,10 @@ void __init x86_64_start_kernel(char * r
/* Cleanup the over mapped high alias */
cleanup_highmap();
+ /* Initialize boot cpu_pda data */
+ /* (See head_64.S for earlier pda/gdt initialization) */
+ pda_init(0);
+
for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) {
#ifdef CONFIG_EARLY_PRINTK
set_intr_gate(i, &early_idt_handlers[i]);
@@ -109,10 +92,6 @@ void __init x86_64_start_kernel(char * r
early_printk("Kernel alive\n");
- x86_64_init_pda();
-
- early_printk("Kernel really alive\n");
-
x86_64_start_reservations(real_mode_data);
}
--- linux-2.6.tip.orig/arch/x86/kernel/head_64.S
+++ linux-2.6.tip/arch/x86/kernel/head_64.S
@@ -248,14 +248,21 @@ ENTRY(secondary_startup_64)
movl %eax,%gs
/*
- * Setup up a dummy PDA. this is just for some early bootup code
- * that does in_interrupt()
+ * Setup up the real PDA.
+ *
+ * For SMP, the boot cpu (0) uses the static pda which is the first
+ * element in the percpu area (@__per_cpu_load). This pda is moved
+ * to the real percpu area once that is allocated. Secondary cpus
+ * will use the initial_pda value setup in do_boot_cpu().
*/
movl $MSR_GS_BASE,%ecx
- movq $empty_zero_page,%rax
+ movq initial_pda(%rip), %rax
movq %rax,%rdx
shrq $32,%rdx
wrmsr
+#ifdef CONFIG_SMP
+ movq %rax, %gs:pda_data_offset
+#endif
/* esi is pointer to real mode structure with interesting info.
pass it to C */
@@ -278,6 +285,12 @@ ENTRY(secondary_startup_64)
.align 8
ENTRY(initial_code)
.quad x86_64_start_kernel
+ ENTRY(initial_pda)
+#ifdef CONFIG_SMP
+ .quad __per_cpu_load # Overwritten for secondary CPUs
+#else
+ .quad per_cpu__pda
+#endif
__FINITDATA
ENTRY(stack_start)
--- linux-2.6.tip.orig/arch/x86/kernel/setup_percpu.c
+++ linux-2.6.tip/arch/x86/kernel/setup_percpu.c
@@ -134,56 +134,8 @@ unsigned long __per_cpu_offset[NR_CPUS]
#endif
EXPORT_SYMBOL(__per_cpu_offset);
-#if !defined(CONFIG_SMP) || !defined(CONFIG_X86_64)
-static inline void setup_cpu_pda_map(void) { }
-
-#else /* CONFIG_SMP && CONFIG_X86_64 */
-
-/*
- * Allocate cpu_pda pointer table and array via alloc_bootmem.
- */
-static void __init setup_cpu_pda_map(void)
-{
- char *pda;
- struct x8664_pda **new_cpu_pda;
- unsigned long size;
- int cpu;
-
- size = roundup(sizeof(struct x8664_pda), cache_line_size());
-
- /* allocate cpu_pda array and pointer table */
- {
- unsigned long tsize = nr_cpu_ids * sizeof(void *);
- unsigned long asize = size * (nr_cpu_ids - 1);
-
- tsize = roundup(tsize, cache_line_size());
- new_cpu_pda = alloc_bootmem(tsize + asize);
- pda = (char *)new_cpu_pda + tsize;
- }
-
- /* initialize pointer table to static pda's */
- for_each_possible_cpu(cpu) {
- if (cpu == 0) {
- /* leave boot cpu pda in place */
- new_cpu_pda[0] = cpu_pda(0);
- DBG("cpu %4d pda %p\n", cpu, cpu_pda(0));
- continue;
- }
- DBG("cpu %4d pda %p\n", cpu, pda);
- new_cpu_pda[cpu] = (struct x8664_pda *)pda;
- new_cpu_pda[cpu]->in_bootmem = 1;
- pda += size;
- }
-
- /* point to new pointer table */
- _cpu_pda = new_cpu_pda;
-}
-#endif
-
/*
- * Great future plan:
- * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
- * Always point %gs to its beginning
+ * Allocate and initialize the per cpu areas which include the PDAs.
*/
void __init setup_per_cpu_areas(void)
{
@@ -191,16 +143,11 @@ void __init setup_per_cpu_areas(void)
char *ptr;
int cpu;
- /* Setup cpu_pda map */
- setup_cpu_pda_map();
-
/* Copy section for each CPU (we discard the original) */
size = PERCPU_ENOUGH_ROOM;
printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n",
size);
- DBG("PERCPU: __per_cpu_start %p\n", __per_cpu_start);
-
for_each_possible_cpu(cpu) {
#ifndef CONFIG_NEED_MULTIPLE_NODES
ptr = alloc_bootmem_pages(size);
@@ -215,26 +162,38 @@ void __init setup_per_cpu_areas(void)
else
ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
#endif
- DBG("PERCPU: cpu %4d %p pda %p %p\n",
- cpu, ptr, _cpu_pda[cpu], cpu_pda(cpu));
-
/* Initialize each cpu's per_cpu area and save pointer */
memcpy(ptr, __per_cpu_load, __per_cpu_size);
per_cpu_offset(cpu) = ptr - __per_cpu_start;
-#ifdef CONFIG_X86_64
- /* save for __my_cpu_offset() */
- cpu_pda(cpu)->data_offset = (unsigned long)ptr;
+ DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
+#ifdef CONFIG_X86_64
/*
- * The boot cpu gdt page must be reloaded as we moved it
- * from the static per cpu area to the newly allocated area.
+ * Note the boot cpu (0) has been using the static per_cpu load
+ * area for it's pda. We need to zero out the pdas for the
+ * other cpus that are coming online.
+ *
+ * Additionally, for the boot cpu the gdt page must be reloaded
+ * as we moved it from the static per cpu area to the newly
+ * allocated area.
*/
- if (cpu == 0) {
- struct desc_ptr gdt_descr = early_gdt_descr;
-
- gdt_descr.address = (unsigned long)get_cpu_gdt_table(0);
- native_load_gdt(&gdt_descr);
+ {
+ /* We rely on the fact that pda is the first element */
+ struct x8664_pda *pda = (struct x8664_pda *)ptr;
+
+ if (cpu) {
+ memset(pda, 0, sizeof(*pda));
+ pda->data_offset = (unsigned long)ptr;
+ } else {
+ struct desc_ptr gdt_descr = early_gdt_descr;
+
+ pda->data_offset = (unsigned long)ptr;
+ gdt_descr.address =
+ (unsigned long)get_cpu_gdt_table(0);
+ native_load_gdt(&gdt_descr);
+ pda_init(0);
+ }
}
#endif
}
--- linux-2.6.tip.orig/arch/x86/kernel/smpboot.c
+++ linux-2.6.tip/arch/x86/kernel/smpboot.c
@@ -744,45 +744,6 @@ static void __cpuinit do_fork_idle(struc
complete(&c_idle->done);
}
-#ifdef CONFIG_X86_64
-/*
- * Allocate node local memory for the AP pda.
- *
- * Must be called after the _cpu_pda pointer table is initialized.
- */
-int __cpuinit get_local_pda(int cpu)
-{
- struct x8664_pda *oldpda, *newpda;
- unsigned long size = sizeof(struct x8664_pda);
- int node = cpu_to_node(cpu);
-
- if (cpu_pda(cpu) && !cpu_pda(cpu)->in_bootmem)
- return 0;
-
- oldpda = cpu_pda(cpu);
- newpda = kmalloc_node(size, GFP_ATOMIC, node);
- if (!newpda) {
- printk(KERN_ERR "Could not allocate node local PDA "
- "for CPU %d on node %d\n", cpu, node);
-
- if (oldpda)
- return 0; /* have a usable pda */
- else
- return -1;
- }
-
- if (oldpda) {
- memcpy(newpda, oldpda, size);
- if (!after_bootmem)
- free_bootmem((unsigned long)oldpda, size);
- }
-
- newpda->in_bootmem = 0;
- cpu_pda(cpu) = newpda;
- return 0;
-}
-#endif /* CONFIG_X86_64 */
-
static int __cpuinit do_boot_cpu(int apicid, int cpu)
/*
* NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
@@ -800,16 +761,6 @@ static int __cpuinit do_boot_cpu(int api
};
INIT_WORK(&c_idle.work, do_fork_idle);
-#ifdef CONFIG_X86_64
- /* Allocate node local memory for AP pdas */
- if (cpu > 0) {
- boot_error = get_local_pda(cpu);
- if (boot_error)
- goto restore_state;
- /* if can't get pda memory, can't start cpu */
- }
-#endif
-
alternatives_smp_switch(1);
c_idle.idle = get_idle_for_cpu(cpu);
@@ -847,6 +798,7 @@ do_rest:
#else
cpu_pda(cpu)->pcurrent = c_idle.idle;
clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
+ initial_pda = (unsigned long)get_cpu_pda(cpu);
#endif
early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
initial_code = (unsigned long)start_secondary;
@@ -921,9 +873,6 @@ do_rest:
inquire_remote_apic(apicid);
}
}
-#ifdef CONFIG_X86_64
-restore_state:
-#endif
if (boot_error) {
/* Try to put things back the way they were before ... */
numa_remove_cpu(cpu); /* was set by numa_add_cpu */
--- linux-2.6.tip.orig/arch/x86/xen/enlighten.c
+++ linux-2.6.tip/arch/x86/xen/enlighten.c
@@ -1748,8 +1748,18 @@ asmlinkage void __init xen_start_kernel(
#ifdef CONFIG_X86_64
/* Disable until direct per-cpu data access. */
have_vcpu_info_placement = 0;
+#if 0
+ /*
+ * FIXME: is the above still true?
+ * Also, x86_64_init_pda() has been removed...
+ * should anything replace it?
+ * (The offset for cpu_pda(0) is statically initialized
+ * to __per_cpu_load, while the remaining pda's come online
+ * in setup_per_cpu_areas().)
+ */
x86_64_init_pda();
#endif
+#endif
xen_smp_init();
--- linux-2.6.tip.orig/arch/x86/xen/smp.c
+++ linux-2.6.tip/arch/x86/xen/smp.c
@@ -285,13 +285,10 @@ static int __cpuinit xen_cpu_up(unsigned
#endif
#ifdef CONFIG_X86_64
- /* Allocate node local memory for AP pdas */
- WARN_ON(cpu == 0);
- if (cpu > 0) {
- rc = get_local_pda(cpu);
- if (rc)
- return rc;
- }
+ /*
+ * FIXME: I don't believe that calling get_local_pda() is
+ * required any more...?
+ */
#endif
#ifdef CONFIG_X86_32
--- linux-2.6.tip.orig/include/asm-x86/desc.h
+++ linux-2.6.tip/include/asm-x86/desc.h
@@ -41,6 +41,11 @@ static inline struct desc_struct *get_cp
#ifdef CONFIG_X86_64
+static inline struct x8664_pda *get_cpu_pda(unsigned int cpu)
+{
+ return &per_cpu(pda, cpu);
+}
+
static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
unsigned dpl, unsigned ist, unsigned seg)
{
--- linux-2.6.tip.orig/include/asm-x86/pda.h
+++ linux-2.6.tip/include/asm-x86/pda.h
@@ -37,10 +37,9 @@ struct x8664_pda {
unsigned irq_spurious_count;
} ____cacheline_aligned_in_smp;
-extern struct x8664_pda **_cpu_pda;
extern void pda_init(int);
-#define cpu_pda(i) (_cpu_pda[i])
+#define cpu_pda(cpu) (&per_cpu(pda, cpu))
/*
* There is no fast way to get the base address of the PDA, all the accesses
--- linux-2.6.tip.orig/include/asm-x86/percpu.h
+++ linux-2.6.tip/include/asm-x86/percpu.h
@@ -3,20 +3,11 @@
#ifdef CONFIG_X86_64
#include <linux/compiler.h>
-
-/* Same as asm-generic/percpu.h, except that we store the per cpu offset
- in the PDA. Longer term the PDA and every per cpu variable
- should be just put into a single section and referenced directly
- from %gs */
-
-#ifdef CONFIG_SMP
#include <asm/pda.h>
-#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset)
+/* Same as asm-generic/percpu.h */
+#ifdef CONFIG_SMP
#define __my_cpu_offset read_pda(data_offset)
-
-#define per_cpu_offset(x) (__per_cpu_offset(x))
-
#endif
#include <asm-generic/percpu.h>
--- linux-2.6.tip.orig/include/asm-x86/setup.h
+++ linux-2.6.tip/include/asm-x86/setup.h
@@ -92,7 +92,6 @@ extern unsigned long init_pg_tables_star
extern unsigned long init_pg_tables_end;
#else
-void __init x86_64_init_pda(void);
void __init x86_64_start_kernel(char *real_mode);
void __init x86_64_start_reservations(char *real_mode_data);
--- linux-2.6.tip.orig/include/asm-x86/smp.h
+++ linux-2.6.tip/include/asm-x86/smp.h
@@ -25,8 +25,6 @@ extern cpumask_t cpu_callin_map;
extern void (*mtrr_hook)(void);
extern void zap_low_mappings(void);
-extern int __cpuinit get_local_pda(int cpu);
-
extern int smp_num_siblings;
extern unsigned int num_processors;
extern cpumask_t cpu_initialized;
--- linux-2.6.tip.orig/include/asm-x86/trampoline.h
+++ linux-2.6.tip/include/asm-x86/trampoline.h
@@ -12,6 +12,7 @@ extern unsigned char *trampoline_base;
extern unsigned long init_rsp;
extern unsigned long initial_code;
+extern unsigned long initial_pda;
#define TRAMPOLINE_BASE 0x6000
extern unsigned long setup_trampoline(void);
--
^ permalink raw reply [flat|nested] 119+ messages in thread
* [PATCH 3/4] x86_64: Fold pda into per cpu area
2008-02-01 19:14 [PATCH 0/4] percpu: Optimize percpu accesses travis
@ 2008-02-01 19:14 ` travis
0 siblings, 0 replies; 119+ messages in thread
From: travis @ 2008-02-01 19:14 UTC (permalink / raw)
To: Andrew Morton, Andi Kleen, Ingo Molnar, Thomas Gleixner
Cc: Jeremy Fitzhardinge, Christoph Lameter, Jack Steiner, linux-mm,
linux-kernel
[-- Attachment #1: x86_64_fold_pda --]
[-- Type: text/plain, Size: 12490 bytes --]
* Declare the pda as a per cpu variable. This will move the pda area
to an address accessible by the x86_64 per cpu macros. Subtraction
of __per_cpu_start will make the offset based from the beginning
of the per cpu area. Since %gs is pointing to the pda, it will
then also point to the per cpu variables and can be accessed thusly:
%gs:[&per_cpu_xxxx - __per_cpu_start]
* The boot_pdas are only needed in head64.c so move the declaration
over there. And since the boot_cpu_pda is only used during
bootup and then copied to the per_cpu areas during init, it is
then removable. In addition, the initial cpu_pda pointer table
is reallocated to be the correct size for the number of cpus.
* Remove the code that allocates special pda data structures.
Since the percpu area is currently maintained for all possible
cpus then the pda regions will stay intact in case cpus are
hotplugged off and then back on.
* Relocate the x86_64 percpu variables to begin at zero. Then
we can directly use the x86_32 percpu operations. x86_32
offsets %fs by __per_cpu_start. x86_64 has %gs pointing
directly to the pda and the per cpu area thereby allowing
access to the pda with the x86_64 pda operations and access
to the per cpu variables using x86_32 percpu operations.
* Introduces a new DEFINE_PER_CPU_FIRST to locate the percpu
variable (cpu_pda in this case) at the beginning of the percpu
.data section.
* This also supports further integration of x86_32/64.
Based on linux-2.6.git + x86.git
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Mike Travis <travis@sgi.com>
---
arch/x86/Kconfig | 3 +
arch/x86/kernel/head64.c | 41 +++++++++++++++++++++++
arch/x86/kernel/setup64.c | 67 +++++++++++++++++++++++---------------
arch/x86/kernel/smpboot_64.c | 16 ---------
arch/x86/kernel/vmlinux_64.lds.S | 1
include/asm-generic/vmlinux.lds.h | 2 +
include/asm-x86/pda.h | 13 +++++--
include/asm-x86/percpu.h | 33 +++++++++++-------
include/linux/percpu.h | 9 ++++-
9 files changed, 126 insertions(+), 59 deletions(-)
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -103,6 +103,9 @@ config GENERIC_TIME_VSYSCALL
config HAVE_SETUP_PER_CPU_AREA
def_bool X86_64
+config HAVE_ZERO_BASED_PER_CPU
+ def_bool X86_64
+
config ARCH_SUPPORTS_OPROFILE
bool
default y
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -11,6 +11,7 @@
#include <linux/string.h>
#include <linux/percpu.h>
#include <linux/start_kernel.h>
+#include <linux/bootmem.h>
#include <asm/processor.h>
#include <asm/proto.h>
@@ -23,6 +24,12 @@
#include <asm/kdebug.h>
#include <asm/e820.h>
+#ifdef CONFIG_SMP
+/* Only used before the per cpu areas are setup. */
+static struct x8664_pda boot_cpu_pda[NR_CPUS] __initdata;
+static struct x8664_pda *_cpu_pda_init[NR_CPUS] __initdata;
+#endif
+
static void __init zap_identity_mappings(void)
{
pgd_t *pgd = pgd_offset_k(0UL);
@@ -99,8 +106,14 @@ void __init x86_64_start_kernel(char * r
early_printk("Kernel alive\n");
+#ifdef CONFIG_SMP
+ _cpu_pda = (void *)_cpu_pda_init;
for (i = 0; i < NR_CPUS; i++)
cpu_pda(i) = &boot_cpu_pda[i];
+#endif
+
+ /* setup percpu segment offset for cpu 0 */
+ cpu_pda(0)->data_offset = (unsigned long)__per_cpu_load;
pda_init(0);
copy_bootdata(__va(real_mode_data));
@@ -125,3 +138,31 @@ void __init x86_64_start_kernel(char * r
start_kernel();
}
+
+#ifdef CONFIG_SMP
+/*
+ * Remove initial boot_cpu_pda array and cpu_pda pointer table.
+ *
+ * This depends on setup_per_cpu_areas relocating the pda to the beginning
+ * of the per_cpu area so that (_cpu_pda[i] != &boot_cpu_pda[i]). If it
+ * is equal then the new pda has not been setup for this cpu, and the pda
+ * table will have a NULL address for this cpu.
+ */
+void __init x86_64_cleanup_pda(void)
+{
+ int i;
+
+ _cpu_pda = alloc_bootmem_low(nr_cpu_ids * sizeof(void *));
+
+ if (!_cpu_pda)
+ panic("Cannot allocate cpu pda table\n");
+
+ /* cpu_pda() now points to allocated cpu_pda_table */
+
+ for (i = 0; i < NR_CPUS; i++)
+ if (_cpu_pda_init[i] == &boot_cpu_pda[i])
+ cpu_pda(i) = NULL;
+ else
+ cpu_pda(i) = _cpu_pda_init[i];
+}
+#endif
--- a/arch/x86/kernel/setup64.c
+++ b/arch/x86/kernel/setup64.c
@@ -32,9 +32,13 @@ struct boot_params boot_params;
cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
-struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly;
+#ifdef CONFIG_SMP
+struct x8664_pda **_cpu_pda __read_mostly;
EXPORT_SYMBOL(_cpu_pda);
-struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned;
+#endif
+
+DEFINE_PER_CPU_FIRST(struct x8664_pda, pda);
+EXPORT_PER_CPU_SYMBOL(pda);
struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
@@ -95,22 +99,14 @@ static void __init setup_per_cpu_maps(vo
int cpu;
for_each_possible_cpu(cpu) {
-#ifdef CONFIG_SMP
- if (per_cpu_offset(cpu)) {
-#endif
- per_cpu(x86_cpu_to_apicid, cpu) =
- x86_cpu_to_apicid_init[cpu];
- per_cpu(x86_bios_cpu_apicid, cpu) =
- x86_bios_cpu_apicid_init[cpu];
+ per_cpu(x86_cpu_to_apicid, cpu) =
+ x86_cpu_to_apicid_init[cpu];
+
+ per_cpu(x86_bios_cpu_apicid, cpu) =
+ x86_bios_cpu_apicid_init[cpu];
#ifdef CONFIG_NUMA
- per_cpu(x86_cpu_to_node_map, cpu) =
- x86_cpu_to_node_map_init[cpu];
-#endif
-#ifdef CONFIG_SMP
- }
- else
- printk(KERN_NOTICE "per_cpu_offset zero for cpu %d\n",
- cpu);
+ per_cpu(x86_cpu_to_node_map, cpu) =
+ x86_cpu_to_node_map_init[cpu];
#endif
}
@@ -139,25 +135,46 @@ void __init setup_per_cpu_areas(void)
/* Copy section for each CPU (we discard the original) */
size = PERCPU_ENOUGH_ROOM;
- printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", size);
- for_each_cpu_mask (i, cpu_possible_map) {
+ printk(KERN_INFO
+ "PERCPU: Allocating %lu bytes of per cpu data\n", size);
+
+ for_each_possible_cpu(i) {
+
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+ char *ptr = alloc_bootmem_pages(size);
+#else
char *ptr;
- if (!NODE_DATA(early_cpu_to_node(i))) {
- printk("cpu with no node %d, num_online_nodes %d\n",
- i, num_online_nodes());
+ if (NODE_DATA(early_cpu_to_node(i)))
+ ptr = alloc_bootmem_pages_node
+ (NODE_DATA(early_cpu_to_node(i)), size);
+
+ else {
+ printk(KERN_INFO
+ "cpu %d has no node, num_online_nodes %d\n",
+ i, num_online_nodes());
ptr = alloc_bootmem_pages(size);
- } else {
- ptr = alloc_bootmem_pages_node(NODE_DATA(early_cpu_to_node(i)), size);
}
+#endif
if (!ptr)
panic("Cannot allocate cpu data for CPU %d\n", i);
+
+ memcpy(ptr, __per_cpu_load, __per_cpu_size);
+
+ /* Relocate the pda */
+ memcpy(ptr, cpu_pda(i), sizeof(struct x8664_pda));
+ cpu_pda(i) = (struct x8664_pda *)ptr;
cpu_pda(i)->data_offset = ptr - __per_cpu_start;
- memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
}
/* setup percpu data maps early */
setup_per_cpu_maps();
+
+ /* clean up early cpu_pda pointer array */
+ x86_64_cleanup_pda();
+
+ /* Fix up pda for this processor .... */
+ pda_init(0);
}
void pda_init(int cpu)
--- a/arch/x86/kernel/smpboot_64.c
+++ b/arch/x86/kernel/smpboot_64.c
@@ -566,22 +566,6 @@ static int __cpuinit do_boot_cpu(int cpu
return -1;
}
- /* Allocate node local memory for AP pdas */
- if (cpu_pda(cpu) == &boot_cpu_pda[cpu]) {
- struct x8664_pda *newpda, *pda;
- int node = cpu_to_node(cpu);
- pda = cpu_pda(cpu);
- newpda = kmalloc_node(sizeof (struct x8664_pda), GFP_ATOMIC,
- node);
- if (newpda) {
- memcpy(newpda, pda, sizeof (struct x8664_pda));
- cpu_pda(cpu) = newpda;
- } else
- printk(KERN_ERR
- "Could not allocate node local PDA for CPU %d on node %d\n",
- cpu, node);
- }
-
alternatives_smp_switch(1);
c_idle.idle = get_idle_for_cpu(cpu);
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -16,6 +16,7 @@ jiffies_64 = jiffies;
_proxy_pda = 1;
PHDRS {
text PT_LOAD FLAGS(5); /* R_E */
+ percpu PT_LOAD FLAGS(4); /* R__ */
data PT_LOAD FLAGS(7); /* RWE */
user PT_LOAD FLAGS(7); /* RWE */
data.init PT_LOAD FLAGS(7); /* RWE */
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -347,6 +347,7 @@
percpu : { } :percpu \
__per_cpu_load = .; \
.data.percpu 0 : AT(__per_cpu_load - LOAD_OFFSET) { \
+ *(.data.percpu.first) \
*(.data.percpu) \
*(.data.percpu.shared_aligned) \
____per_cpu_size = .; \
@@ -358,6 +359,7 @@
. = ALIGN(align); \
__per_cpu_start = .; \
.data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { \
+ *(.data.percpu.first) \
*(.data.percpu) \
*(.data.percpu.shared_aligned) \
} \
--- a/include/asm-x86/pda.h
+++ b/include/asm-x86/pda.h
@@ -38,11 +38,16 @@ struct x8664_pda {
unsigned irq_spurious_count;
} ____cacheline_aligned_in_smp;
-extern struct x8664_pda *_cpu_pda[];
-extern struct x8664_pda boot_cpu_pda[];
-extern void pda_init(int);
-
+#ifdef CONFIG_SMP
#define cpu_pda(i) (_cpu_pda[i])
+extern struct x8664_pda **_cpu_pda;
+extern void x86_64_cleanup_pda(void);
+#else
+#define cpu_pda(i) (&per_cpu(pda, i))
+static inline void x86_64_cleanup_pda(void) { }
+#endif
+
+extern void pda_init(int);
/*
* There is no fast way to get the base address of the PDA, all the accesses
--- a/include/asm-x86/percpu.h
+++ b/include/asm-x86/percpu.h
@@ -13,13 +13,19 @@
#include <asm/pda.h>
#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset)
-#define __my_cpu_offset read_pda(data_offset)
-
#define per_cpu_offset(x) (__per_cpu_offset(x))
+#define __my_cpu_offset read_pda(data_offset)
+#define __percpu_seg "%%gs:"
+
+#else
+#define __percpu_seg ""
#endif
#include <asm-generic/percpu.h>
+/* Calculate the offset to use with the segment register */
+#define seg_offset(name) per_cpu_var(name)
+
DECLARE_PER_CPU(struct x8664_pda, pda);
#else /* CONFIG_X86_64 */
@@ -64,16 +70,11 @@ DECLARE_PER_CPU(struct x8664_pda, pda);
* PER_CPU(cpu_gdt_descr, %ebx)
*/
#ifdef CONFIG_SMP
-
#define __my_cpu_offset x86_read_percpu(this_cpu_off)
-
/* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */
#define __percpu_seg "%%fs:"
-
#else /* !SMP */
-
#define __percpu_seg ""
-
#endif /* SMP */
#include <asm-generic/percpu.h>
@@ -81,6 +82,13 @@ DECLARE_PER_CPU(struct x8664_pda, pda);
/* We can use this directly for local CPU (faster). */
DECLARE_PER_CPU(unsigned long, this_cpu_off);
+#define seg_offset(name) per_cpu_var(name)
+
+#endif /* __ASSEMBLY__ */
+#endif /* !CONFIG_X86_64 */
+
+#ifndef __ASSEMBLY__
+
/* For arch-specific code, we can use direct single-insn ops (they
* don't give an lvalue though). */
extern void __bad_percpu_size(void);
@@ -132,11 +140,10 @@ extern void __bad_percpu_size(void);
} \
ret__; })
-#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var)
-#define x86_write_percpu(var,val) percpu_to_op("mov", per_cpu__##var, val)
-#define x86_add_percpu(var,val) percpu_to_op("add", per_cpu__##var, val)
-#define x86_sub_percpu(var,val) percpu_to_op("sub", per_cpu__##var, val)
-#define x86_or_percpu(var,val) percpu_to_op("or", per_cpu__##var, val)
+#define x86_read_percpu(var) percpu_from_op("mov", seg_offset(var))
+#define x86_write_percpu(var,val) percpu_to_op("mov", seg_offset(var), val)
+#define x86_add_percpu(var,val) percpu_to_op("add", seg_offset(var), val)
+#define x86_sub_percpu(var,val) percpu_to_op("sub", seg_offset(var), val)
+#define x86_or_percpu(var,val) percpu_to_op("or", seg_offset(var), val)
#endif /* !__ASSEMBLY__ */
-#endif /* !CONFIG_X86_64 */
#endif /* _ASM_X86_PERCPU_H_ */
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -18,11 +18,18 @@
__attribute__((__section__(".data.percpu.shared_aligned"))) \
PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name \
____cacheline_aligned_in_smp
+
+#define DEFINE_PER_CPU_FIRST(type, name) \
+ __attribute__((__section__(".data.percpu.first"))) \
+ PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
#else
#define DEFINE_PER_CPU(type, name) \
PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
+#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
+ DEFINE_PER_CPU(type, name)
+
+#define DEFINE_PER_CPU_FIRST(type, name) \
DEFINE_PER_CPU(type, name)
#endif
--
^ permalink raw reply [flat|nested] 119+ messages in thread* [PATCH 3/4] x86_64: Fold pda into per cpu area
@ 2008-02-01 19:14 ` travis
0 siblings, 0 replies; 119+ messages in thread
From: travis @ 2008-02-01 19:14 UTC (permalink / raw)
To: Andrew Morton, Andi Kleen, Ingo Molnar, Thomas Gleixner
Cc: Jeremy Fitzhardinge, Christoph Lameter, Jack Steiner, linux-mm,
linux-kernel
[-- Attachment #1: x86_64_fold_pda --]
[-- Type: text/plain, Size: 12362 bytes --]
%gs:[&per_cpu_xxxx - __per_cpu_start]
* The boot_pdas are only needed in head64.c so move the declaration
over there. And since the boot_cpu_pda is only used during
bootup and then copied to the per_cpu areas during init, it is
then removable. In addition, the initial cpu_pda pointer table
is reallocated to be the correct size for the number of cpus.
* Remove the code that allocates special pda data structures.
Since the percpu area is currently maintained for all possible
cpus then the pda regions will stay intact in case cpus are
hotplugged off and then back on.
* Relocate the x86_64 percpu variables to begin at zero. Then
we can directly use the x86_32 percpu operations. x86_32
offsets %fs by __per_cpu_start. x86_64 has %gs pointing
directly to the pda and the per cpu area thereby allowing
access to the pda with the x86_64 pda operations and access
to the per cpu variables using x86_32 percpu operations.
* Introduces a new DEFINE_PER_CPU_FIRST to locate the percpu
variable (cpu_pda in this case) at the beginning of the percpu
.data section.
* This also supports further integration of x86_32/64.
Based on linux-2.6.git + x86.git
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Mike Travis <travis@sgi.com>
---
arch/x86/Kconfig | 3 +
arch/x86/kernel/head64.c | 41 +++++++++++++++++++++++
arch/x86/kernel/setup64.c | 67 +++++++++++++++++++++++---------------
arch/x86/kernel/smpboot_64.c | 16 ---------
arch/x86/kernel/vmlinux_64.lds.S | 1
include/asm-generic/vmlinux.lds.h | 2 +
include/asm-x86/pda.h | 13 +++++--
include/asm-x86/percpu.h | 33 +++++++++++-------
include/linux/percpu.h | 9 ++++-
9 files changed, 126 insertions(+), 59 deletions(-)
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -103,6 +103,9 @@ config GENERIC_TIME_VSYSCALL
config HAVE_SETUP_PER_CPU_AREA
def_bool X86_64
+config HAVE_ZERO_BASED_PER_CPU
+ def_bool X86_64
+
config ARCH_SUPPORTS_OPROFILE
bool
default y
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -11,6 +11,7 @@
#include <linux/string.h>
#include <linux/percpu.h>
#include <linux/start_kernel.h>
+#include <linux/bootmem.h>
#include <asm/processor.h>
#include <asm/proto.h>
@@ -23,6 +24,12 @@
#include <asm/kdebug.h>
#include <asm/e820.h>
+#ifdef CONFIG_SMP
+/* Only used before the per cpu areas are setup. */
+static struct x8664_pda boot_cpu_pda[NR_CPUS] __initdata;
+static struct x8664_pda *_cpu_pda_init[NR_CPUS] __initdata;
+#endif
+
static void __init zap_identity_mappings(void)
{
pgd_t *pgd = pgd_offset_k(0UL);
@@ -99,8 +106,14 @@ void __init x86_64_start_kernel(char * r
early_printk("Kernel alive\n");
+#ifdef CONFIG_SMP
+ _cpu_pda = (void *)_cpu_pda_init;
for (i = 0; i < NR_CPUS; i++)
cpu_pda(i) = &boot_cpu_pda[i];
+#endif
+
+ /* setup percpu segment offset for cpu 0 */
+ cpu_pda(0)->data_offset = (unsigned long)__per_cpu_load;
pda_init(0);
copy_bootdata(__va(real_mode_data));
@@ -125,3 +138,31 @@ void __init x86_64_start_kernel(char * r
start_kernel();
}
+
+#ifdef CONFIG_SMP
+/*
+ * Remove initial boot_cpu_pda array and cpu_pda pointer table.
+ *
+ * This depends on setup_per_cpu_areas relocating the pda to the beginning
+ * of the per_cpu area so that (_cpu_pda[i] != &boot_cpu_pda[i]). If it
+ * is equal then the new pda has not been setup for this cpu, and the pda
+ * table will have a NULL address for this cpu.
+ */
+void __init x86_64_cleanup_pda(void)
+{
+ int i;
+
+ _cpu_pda = alloc_bootmem_low(nr_cpu_ids * sizeof(void *));
+
+ if (!_cpu_pda)
+ panic("Cannot allocate cpu pda table\n");
+
+ /* cpu_pda() now points to allocated cpu_pda_table */
+
+ for (i = 0; i < NR_CPUS; i++)
+ if (_cpu_pda_init[i] == &boot_cpu_pda[i])
+ cpu_pda(i) = NULL;
+ else
+ cpu_pda(i) = _cpu_pda_init[i];
+}
+#endif
--- a/arch/x86/kernel/setup64.c
+++ b/arch/x86/kernel/setup64.c
@@ -32,9 +32,13 @@ struct boot_params boot_params;
cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
-struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly;
+#ifdef CONFIG_SMP
+struct x8664_pda **_cpu_pda __read_mostly;
EXPORT_SYMBOL(_cpu_pda);
-struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned;
+#endif
+
+DEFINE_PER_CPU_FIRST(struct x8664_pda, pda);
+EXPORT_PER_CPU_SYMBOL(pda);
struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
@@ -95,22 +99,14 @@ static void __init setup_per_cpu_maps(vo
int cpu;
for_each_possible_cpu(cpu) {
-#ifdef CONFIG_SMP
- if (per_cpu_offset(cpu)) {
-#endif
- per_cpu(x86_cpu_to_apicid, cpu) =
- x86_cpu_to_apicid_init[cpu];
- per_cpu(x86_bios_cpu_apicid, cpu) =
- x86_bios_cpu_apicid_init[cpu];
+ per_cpu(x86_cpu_to_apicid, cpu) =
+ x86_cpu_to_apicid_init[cpu];
+
+ per_cpu(x86_bios_cpu_apicid, cpu) =
+ x86_bios_cpu_apicid_init[cpu];
#ifdef CONFIG_NUMA
- per_cpu(x86_cpu_to_node_map, cpu) =
- x86_cpu_to_node_map_init[cpu];
-#endif
-#ifdef CONFIG_SMP
- }
- else
- printk(KERN_NOTICE "per_cpu_offset zero for cpu %d\n",
- cpu);
+ per_cpu(x86_cpu_to_node_map, cpu) =
+ x86_cpu_to_node_map_init[cpu];
#endif
}
@@ -139,25 +135,46 @@ void __init setup_per_cpu_areas(void)
/* Copy section for each CPU (we discard the original) */
size = PERCPU_ENOUGH_ROOM;
- printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", size);
- for_each_cpu_mask (i, cpu_possible_map) {
+ printk(KERN_INFO
+ "PERCPU: Allocating %lu bytes of per cpu data\n", size);
+
+ for_each_possible_cpu(i) {
+
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+ char *ptr = alloc_bootmem_pages(size);
+#else
char *ptr;
- if (!NODE_DATA(early_cpu_to_node(i))) {
- printk("cpu with no node %d, num_online_nodes %d\n",
- i, num_online_nodes());
+ if (NODE_DATA(early_cpu_to_node(i)))
+ ptr = alloc_bootmem_pages_node
+ (NODE_DATA(early_cpu_to_node(i)), size);
+
+ else {
+ printk(KERN_INFO
+ "cpu %d has no node, num_online_nodes %d\n",
+ i, num_online_nodes());
ptr = alloc_bootmem_pages(size);
- } else {
- ptr = alloc_bootmem_pages_node(NODE_DATA(early_cpu_to_node(i)), size);
}
+#endif
if (!ptr)
panic("Cannot allocate cpu data for CPU %d\n", i);
+
+ memcpy(ptr, __per_cpu_load, __per_cpu_size);
+
+ /* Relocate the pda */
+ memcpy(ptr, cpu_pda(i), sizeof(struct x8664_pda));
+ cpu_pda(i) = (struct x8664_pda *)ptr;
cpu_pda(i)->data_offset = ptr - __per_cpu_start;
- memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
}
/* setup percpu data maps early */
setup_per_cpu_maps();
+
+ /* clean up early cpu_pda pointer array */
+ x86_64_cleanup_pda();
+
+ /* Fix up pda for this processor .... */
+ pda_init(0);
}
void pda_init(int cpu)
--- a/arch/x86/kernel/smpboot_64.c
+++ b/arch/x86/kernel/smpboot_64.c
@@ -566,22 +566,6 @@ static int __cpuinit do_boot_cpu(int cpu
return -1;
}
- /* Allocate node local memory for AP pdas */
- if (cpu_pda(cpu) == &boot_cpu_pda[cpu]) {
- struct x8664_pda *newpda, *pda;
- int node = cpu_to_node(cpu);
- pda = cpu_pda(cpu);
- newpda = kmalloc_node(sizeof (struct x8664_pda), GFP_ATOMIC,
- node);
- if (newpda) {
- memcpy(newpda, pda, sizeof (struct x8664_pda));
- cpu_pda(cpu) = newpda;
- } else
- printk(KERN_ERR
- "Could not allocate node local PDA for CPU %d on node %d\n",
- cpu, node);
- }
-
alternatives_smp_switch(1);
c_idle.idle = get_idle_for_cpu(cpu);
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -16,6 +16,7 @@ jiffies_64 = jiffies;
_proxy_pda = 1;
PHDRS {
text PT_LOAD FLAGS(5); /* R_E */
+ percpu PT_LOAD FLAGS(4); /* R__ */
data PT_LOAD FLAGS(7); /* RWE */
user PT_LOAD FLAGS(7); /* RWE */
data.init PT_LOAD FLAGS(7); /* RWE */
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -347,6 +347,7 @@
percpu : { } :percpu \
__per_cpu_load = .; \
.data.percpu 0 : AT(__per_cpu_load - LOAD_OFFSET) { \
+ *(.data.percpu.first) \
*(.data.percpu) \
*(.data.percpu.shared_aligned) \
____per_cpu_size = .; \
@@ -358,6 +359,7 @@
. = ALIGN(align); \
__per_cpu_start = .; \
.data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { \
+ *(.data.percpu.first) \
*(.data.percpu) \
*(.data.percpu.shared_aligned) \
} \
--- a/include/asm-x86/pda.h
+++ b/include/asm-x86/pda.h
@@ -38,11 +38,16 @@ struct x8664_pda {
unsigned irq_spurious_count;
} ____cacheline_aligned_in_smp;
-extern struct x8664_pda *_cpu_pda[];
-extern struct x8664_pda boot_cpu_pda[];
-extern void pda_init(int);
-
+#ifdef CONFIG_SMP
#define cpu_pda(i) (_cpu_pda[i])
+extern struct x8664_pda **_cpu_pda;
+extern void x86_64_cleanup_pda(void);
+#else
+#define cpu_pda(i) (&per_cpu(pda, i))
+static inline void x86_64_cleanup_pda(void) { }
+#endif
+
+extern void pda_init(int);
/*
* There is no fast way to get the base address of the PDA, all the accesses
--- a/include/asm-x86/percpu.h
+++ b/include/asm-x86/percpu.h
@@ -13,13 +13,19 @@
#include <asm/pda.h>
#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset)
-#define __my_cpu_offset read_pda(data_offset)
-
#define per_cpu_offset(x) (__per_cpu_offset(x))
+#define __my_cpu_offset read_pda(data_offset)
+#define __percpu_seg "%%gs:"
+
+#else
+#define __percpu_seg ""
#endif
#include <asm-generic/percpu.h>
+/* Calculate the offset to use with the segment register */
+#define seg_offset(name) per_cpu_var(name)
+
DECLARE_PER_CPU(struct x8664_pda, pda);
#else /* CONFIG_X86_64 */
@@ -64,16 +70,11 @@ DECLARE_PER_CPU(struct x8664_pda, pda);
* PER_CPU(cpu_gdt_descr, %ebx)
*/
#ifdef CONFIG_SMP
-
#define __my_cpu_offset x86_read_percpu(this_cpu_off)
-
/* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */
#define __percpu_seg "%%fs:"
-
#else /* !SMP */
-
#define __percpu_seg ""
-
#endif /* SMP */
#include <asm-generic/percpu.h>
@@ -81,6 +82,13 @@ DECLARE_PER_CPU(struct x8664_pda, pda);
/* We can use this directly for local CPU (faster). */
DECLARE_PER_CPU(unsigned long, this_cpu_off);
+#define seg_offset(name) per_cpu_var(name)
+
+#endif /* __ASSEMBLY__ */
+#endif /* !CONFIG_X86_64 */
+
+#ifndef __ASSEMBLY__
+
/* For arch-specific code, we can use direct single-insn ops (they
* don't give an lvalue though). */
extern void __bad_percpu_size(void);
@@ -132,11 +140,10 @@ extern void __bad_percpu_size(void);
} \
ret__; })
-#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var)
-#define x86_write_percpu(var,val) percpu_to_op("mov", per_cpu__##var, val)
-#define x86_add_percpu(var,val) percpu_to_op("add", per_cpu__##var, val)
-#define x86_sub_percpu(var,val) percpu_to_op("sub", per_cpu__##var, val)
-#define x86_or_percpu(var,val) percpu_to_op("or", per_cpu__##var, val)
+#define x86_read_percpu(var) percpu_from_op("mov", seg_offset(var))
+#define x86_write_percpu(var,val) percpu_to_op("mov", seg_offset(var), val)
+#define x86_add_percpu(var,val) percpu_to_op("add", seg_offset(var), val)
+#define x86_sub_percpu(var,val) percpu_to_op("sub", seg_offset(var), val)
+#define x86_or_percpu(var,val) percpu_to_op("or", seg_offset(var), val)
#endif /* !__ASSEMBLY__ */
-#endif /* !CONFIG_X86_64 */
#endif /* _ASM_X86_PERCPU_H_ */
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -18,11 +18,18 @@
__attribute__((__section__(".data.percpu.shared_aligned"))) \
PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name \
____cacheline_aligned_in_smp
+
+#define DEFINE_PER_CPU_FIRST(type, name) \
+ __attribute__((__section__(".data.percpu.first"))) \
+ PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
#else
#define DEFINE_PER_CPU(type, name) \
PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
+#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
+ DEFINE_PER_CPU(type, name)
+
+#define DEFINE_PER_CPU_FIRST(type, name) \
DEFINE_PER_CPU(type, name)
#endif
--
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 119+ messages in thread* Re: [PATCH 3/4] x86_64: Fold pda into per cpu area
2008-02-01 19:14 ` travis
@ 2008-02-15 20:16 ` Ingo Molnar
-1 siblings, 0 replies; 119+ messages in thread
From: Ingo Molnar @ 2008-02-15 20:16 UTC (permalink / raw)
To: travis
Cc: Andrew Morton, Andi Kleen, Thomas Gleixner, Jeremy Fitzhardinge,
Christoph Lameter, Jack Steiner, linux-mm, linux-kernel
* travis@sgi.com <travis@sgi.com> wrote:
> include/asm-generic/vmlinux.lds.h | 2 +
> include/linux/percpu.h | 9 ++++-
couldnt these two generic bits be done separately (perhaps a preparatory
but otherwise NOP patch pushed upstream straight away) to make
subsequent patches only touch x86 architecture files?
Ingo
^ permalink raw reply [flat|nested] 119+ messages in thread
* Re: [PATCH 3/4] x86_64: Fold pda into per cpu area
@ 2008-02-15 20:16 ` Ingo Molnar
0 siblings, 0 replies; 119+ messages in thread
From: Ingo Molnar @ 2008-02-15 20:16 UTC (permalink / raw)
To: travis
Cc: Andrew Morton, Andi Kleen, Thomas Gleixner, Jeremy Fitzhardinge,
Christoph Lameter, Jack Steiner, linux-mm, linux-kernel
* travis@sgi.com <travis@sgi.com> wrote:
> include/asm-generic/vmlinux.lds.h | 2 +
> include/linux/percpu.h | 9 ++++-
couldnt these two generic bits be done separately (perhaps a preparatory
but otherwise NOP patch pushed upstream straight away) to make
subsequent patches only touch x86 architecture files?
Ingo
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 119+ messages in thread
* Re: [PATCH 3/4] x86_64: Fold pda into per cpu area
2008-02-15 20:16 ` Ingo Molnar
@ 2008-02-15 22:43 ` Christoph Lameter
-1 siblings, 0 replies; 119+ messages in thread
From: Christoph Lameter @ 2008-02-15 22:43 UTC (permalink / raw)
To: Ingo Molnar
Cc: travis, Andrew Morton, Andi Kleen, Thomas Gleixner,
Jeremy Fitzhardinge, Jack Steiner, linux-mm, linux-kernel
On Fri, 15 Feb 2008, Ingo Molnar wrote:
>
> * travis@sgi.com <travis@sgi.com> wrote:
>
> > include/asm-generic/vmlinux.lds.h | 2 +
> > include/linux/percpu.h | 9 ++++-
>
> couldnt these two generic bits be done separately (perhaps a preparatory
> but otherwise NOP patch pushed upstream straight away) to make
> subsequent patches only touch x86 architecture files?
Yes those modifications could be folded into the generic patch for zero
based percpu configurations.
^ permalink raw reply [flat|nested] 119+ messages in thread
* Re: [PATCH 3/4] x86_64: Fold pda into per cpu area
@ 2008-02-15 22:43 ` Christoph Lameter
0 siblings, 0 replies; 119+ messages in thread
From: Christoph Lameter @ 2008-02-15 22:43 UTC (permalink / raw)
To: Ingo Molnar
Cc: travis, Andrew Morton, Andi Kleen, Thomas Gleixner,
Jeremy Fitzhardinge, Jack Steiner, linux-mm, linux-kernel
On Fri, 15 Feb 2008, Ingo Molnar wrote:
>
> * travis@sgi.com <travis@sgi.com> wrote:
>
> > include/asm-generic/vmlinux.lds.h | 2 +
> > include/linux/percpu.h | 9 ++++-
>
> couldnt these two generic bits be done separately (perhaps a preparatory
> but otherwise NOP patch pushed upstream straight away) to make
> subsequent patches only touch x86 architecture files?
Yes those modifications could be folded into the generic patch for zero
based percpu configurations.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 119+ messages in thread
* Re: [PATCH 3/4] x86_64: Fold pda into per cpu area
2008-02-15 20:16 ` Ingo Molnar
@ 2008-02-17 6:22 ` Yinghai Lu
-1 siblings, 0 replies; 119+ messages in thread
From: Yinghai Lu @ 2008-02-17 6:22 UTC (permalink / raw)
To: Ingo Molnar
Cc: travis, Andrew Morton, Andi Kleen, Thomas Gleixner,
Jeremy Fitzhardinge, Christoph Lameter, Jack Steiner, linux-mm,
linux-kernel
On Feb 15, 2008 12:16 PM, Ingo Molnar <mingo@elte.hu> wrote:
>
> * travis@sgi.com <travis@sgi.com> wrote:
>
> > include/asm-generic/vmlinux.lds.h | 2 +
> > include/linux/percpu.h | 9 ++++-
>
> couldnt these two generic bits be done separately (perhaps a preparatory
> but otherwise NOP patch pushed upstream straight away) to make
> subsequent patches only touch x86 architecture files?
this patch need to apply to mainline asap.
or you need revert to the patch about include/asm-x86/percpu.h
+#ifdef CONFIG_X86_64
+#include <linux/compiler.h>
+
+/* Same as asm-generic/percpu.h, except that we store the per cpu offset
+ in the PDA. Longer term the PDA and every per cpu variable
+ should be just put into a single section and referenced directly
+ from %gs */
+
+#ifdef CONFIG_SMP
+#include <asm/pda.h>
+
+#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset)
+#define __my_cpu_offset read_pda(data_offset)
+
+#define per_cpu_offset(x) (__per_cpu_offset(x))
+
#endif
+#include <asm-generic/percpu.h>
+
+DECLARE_PER_CPU(struct x8664_pda, pda);
+
+#else /* CONFIG_X86_64 */
because current tree
in setup_per_cpu_areas will have
cpu_pda(i)->data_offset = ptr - __per_cpu_start;
but at that time all APs will use cpu_pda for boot cpu...,and APs will
get their pda in do_boot_cpu()
the result is all cpu will have same data_offset, there will share one
per_cpu_data..that is totally wrong!!
that could explain a lot of strange panic ....recently about NUMA...
YH
^ permalink raw reply [flat|nested] 119+ messages in thread* Re: [PATCH 3/4] x86_64: Fold pda into per cpu area
@ 2008-02-17 6:22 ` Yinghai Lu
0 siblings, 0 replies; 119+ messages in thread
From: Yinghai Lu @ 2008-02-17 6:22 UTC (permalink / raw)
To: Ingo Molnar
Cc: travis, Andrew Morton, Andi Kleen, Thomas Gleixner,
Jeremy Fitzhardinge, Christoph Lameter, Jack Steiner, linux-mm,
linux-kernel
On Feb 15, 2008 12:16 PM, Ingo Molnar <mingo@elte.hu> wrote:
>
> * travis@sgi.com <travis@sgi.com> wrote:
>
> > include/asm-generic/vmlinux.lds.h | 2 +
> > include/linux/percpu.h | 9 ++++-
>
> couldnt these two generic bits be done separately (perhaps a preparatory
> but otherwise NOP patch pushed upstream straight away) to make
> subsequent patches only touch x86 architecture files?
this patch need to apply to mainline asap.
or you need revert to the patch about include/asm-x86/percpu.h
+#ifdef CONFIG_X86_64
+#include <linux/compiler.h>
+
+/* Same as asm-generic/percpu.h, except that we store the per cpu offset
+ in the PDA. Longer term the PDA and every per cpu variable
+ should be just put into a single section and referenced directly
+ from %gs */
+
+#ifdef CONFIG_SMP
+#include <asm/pda.h>
+
+#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset)
+#define __my_cpu_offset read_pda(data_offset)
+
+#define per_cpu_offset(x) (__per_cpu_offset(x))
+
#endif
+#include <asm-generic/percpu.h>
+
+DECLARE_PER_CPU(struct x8664_pda, pda);
+
+#else /* CONFIG_X86_64 */
because current tree
in setup_per_cpu_areas will have
cpu_pda(i)->data_offset = ptr - __per_cpu_start;
but at that time all APs will use cpu_pda for boot cpu...,and APs will
get their pda in do_boot_cpu()
the result is all cpu will have same data_offset, there will share one
per_cpu_data..that is totally wrong!!
that could explain a lot of strange panic ....recently about NUMA...
YH
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 119+ messages in thread* Re: [PATCH 3/4] x86_64: Fold pda into per cpu area
2008-02-17 6:22 ` Yinghai Lu
@ 2008-02-17 7:36 ` Yinghai Lu
-1 siblings, 0 replies; 119+ messages in thread
From: Yinghai Lu @ 2008-02-17 7:36 UTC (permalink / raw)
To: Ingo Molnar
Cc: travis, Andrew Morton, Andi Kleen, Thomas Gleixner,
Jeremy Fitzhardinge, Christoph Lameter, Jack Steiner, linux-mm,
linux-kernel
On Feb 16, 2008 10:22 PM, Yinghai Lu <yhlu.kernel@gmail.com> wrote:
> On Feb 15, 2008 12:16 PM, Ingo Molnar <mingo@elte.hu> wrote:
> >
> > * travis@sgi.com <travis@sgi.com> wrote:
> >
> > > include/asm-generic/vmlinux.lds.h | 2 +
> > > include/linux/percpu.h | 9 ++++-
> >
> > couldnt these two generic bits be done separately (perhaps a preparatory
> > but otherwise NOP patch pushed upstream straight away) to make
> > subsequent patches only touch x86 architecture files?
>
> this patch need to apply to mainline asap.
>
> or you need revert to the patch about include/asm-x86/percpu.h
>
> +#ifdef CONFIG_X86_64
> +#include <linux/compiler.h>
> +
> +/* Same as asm-generic/percpu.h, except that we store the per cpu offset
> + in the PDA. Longer term the PDA and every per cpu variable
> + should be just put into a single section and referenced directly
> + from %gs */
> +
> +#ifdef CONFIG_SMP
> +#include <asm/pda.h>
> +
> +#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset)
> +#define __my_cpu_offset read_pda(data_offset)
> +
> +#define per_cpu_offset(x) (__per_cpu_offset(x))
> +
> #endif
> +#include <asm-generic/percpu.h>
> +
> +DECLARE_PER_CPU(struct x8664_pda, pda);
> +
> +#else /* CONFIG_X86_64 */
>
> because current tree
> in setup_per_cpu_areas will have
> cpu_pda(i)->data_offset = ptr - __per_cpu_start;
>
> but at that time all APs will use cpu_pda for boot cpu...,and APs will
> get their pda in do_boot_cpu()
sorry, boot_cpu_pda is array... so that is safe.
YH
^ permalink raw reply [flat|nested] 119+ messages in thread
* Re: [PATCH 3/4] x86_64: Fold pda into per cpu area
@ 2008-02-17 7:36 ` Yinghai Lu
0 siblings, 0 replies; 119+ messages in thread
From: Yinghai Lu @ 2008-02-17 7:36 UTC (permalink / raw)
To: Ingo Molnar
Cc: travis, Andrew Morton, Andi Kleen, Thomas Gleixner,
Jeremy Fitzhardinge, Christoph Lameter, Jack Steiner, linux-mm,
linux-kernel
On Feb 16, 2008 10:22 PM, Yinghai Lu <yhlu.kernel@gmail.com> wrote:
> On Feb 15, 2008 12:16 PM, Ingo Molnar <mingo@elte.hu> wrote:
> >
> > * travis@sgi.com <travis@sgi.com> wrote:
> >
> > > include/asm-generic/vmlinux.lds.h | 2 +
> > > include/linux/percpu.h | 9 ++++-
> >
> > couldnt these two generic bits be done separately (perhaps a preparatory
> > but otherwise NOP patch pushed upstream straight away) to make
> > subsequent patches only touch x86 architecture files?
>
> this patch need to apply to mainline asap.
>
> or you need revert to the patch about include/asm-x86/percpu.h
>
> +#ifdef CONFIG_X86_64
> +#include <linux/compiler.h>
> +
> +/* Same as asm-generic/percpu.h, except that we store the per cpu offset
> + in the PDA. Longer term the PDA and every per cpu variable
> + should be just put into a single section and referenced directly
> + from %gs */
> +
> +#ifdef CONFIG_SMP
> +#include <asm/pda.h>
> +
> +#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset)
> +#define __my_cpu_offset read_pda(data_offset)
> +
> +#define per_cpu_offset(x) (__per_cpu_offset(x))
> +
> #endif
> +#include <asm-generic/percpu.h>
> +
> +DECLARE_PER_CPU(struct x8664_pda, pda);
> +
> +#else /* CONFIG_X86_64 */
>
> because current tree
> in setup_per_cpu_areas will have
> cpu_pda(i)->data_offset = ptr - __per_cpu_start;
>
> but at that time all APs will use cpu_pda for boot cpu...,and APs will
> get their pda in do_boot_cpu()
sorry, boot_cpu_pda is array... so that is safe.
YH
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 119+ messages in thread