* [PATCH 1/4] x86_64: Cleanup early setup_percpu references
2008-07-25 21:11 [PATCH 0/4] x86_64: Optimize percpu accesses Mike Travis
@ 2008-07-25 21:11 ` Mike Travis
2008-07-25 21:11 ` [PATCH 2/4] x86_64: Base percpu variables at zero Mike Travis
` (5 subsequent siblings)
6 siblings, 0 replies; 12+ messages in thread
From: Mike Travis @ 2008-07-25 21:11 UTC (permalink / raw)
To: Ingo Molnar, Andrew Morton
Cc: Eric W. Biederman, Hugh Dickins, Jack Steiner,
Jeremy Fitzhardinge, H. Peter Anvin, linux-kernel
[-- Attachment #1: cleanup_percpu --]
[-- Type: text/plain, Size: 5280 bytes --]
* Ruggedize some calls in setup_percpu.c to prevent mishaps
in early calls, particularly for non-critical functions.
* Cleanup DEBUG_PER_CPU_MAPS usages and some comments.
Based on linux-2.6.tip/master with following patches applied:
cpumask: Make cpumask_of_cpu_map generic
cpumask: Put cpumask_of_cpu_map in the initdata section
cpumask: Change cpumask_of_cpu_ptr to use new cpumask_of_cpu
Signed-off-by: Mike Travis <travis@sgi.com>
---
arch/x86/kernel/setup_percpu.c | 66 +++++++++++++++++++++++++++++------------
1 file changed, 47 insertions(+), 19 deletions(-)
--- linux-2.6.tip.orig/arch/x86/kernel/setup_percpu.c
+++ linux-2.6.tip/arch/x86/kernel/setup_percpu.c
@@ -15,6 +15,12 @@
#include <asm/apicdef.h>
#include <asm/highmem.h>
+#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+# define DBG(x...) printk(KERN_DEBUG x)
+#else
+# define DBG(x...)
+#endif
+
#ifdef CONFIG_X86_LOCAL_APIC
unsigned int num_processors;
unsigned disabled_cpus __cpuinitdata;
@@ -27,31 +33,39 @@ EXPORT_SYMBOL(boot_cpu_physical_apicid);
physid_mask_t phys_cpu_present_map;
#endif
-/* map cpu index to physical APIC ID */
+/*
+ * Map cpu index to physical APIC ID
+ */
DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID);
DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID);
EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
-#define X86_64_NUMA 1
+#define X86_64_NUMA 1 /* (used later) */
-/* map cpu index to node index */
+/*
+ * Map cpu index to node index
+ */
DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
-/* which logical CPUs are on which nodes */
+/*
+ * Which logical CPUs are on which nodes
+ */
cpumask_t *node_to_cpumask_map;
EXPORT_SYMBOL(node_to_cpumask_map);
-/* setup node_to_cpumask_map */
+/*
+ * Setup node_to_cpumask_map
+ */
static void __init setup_node_to_cpumask_map(void);
#else
static inline void setup_node_to_cpumask_map(void) { }
#endif
-#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP)
+#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
/*
* Copy data used in early init routines from the initial arrays to the
* per cpu data areas. These arrays then become expendable and the
@@ -90,11 +104,16 @@ static void __init setup_per_cpu_maps(vo
static void __init setup_cpumask_of_cpu(void)
{
int i;
+ cpumask_t *new_map;
/* alloc_bootmem zeroes memory */
- cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids);
+ new_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids);
+ DBG("cpumask_of_cpu_map at %p\n", new_map);
+
for (i = 0; i < nr_cpu_ids; i++)
- cpu_set(i, cpumask_of_cpu_map[i]);
+ cpu_set(i, new_map[i]);
+
+ cpumask_of_cpu_map = (const cpumask_t *)new_map;
}
#else
static inline void setup_cpumask_of_cpu(void) { }
@@ -189,9 +208,10 @@ void __init setup_per_cpu_areas(void)
per_cpu_offset(cpu) = ptr - __per_cpu_start;
memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
+ DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
}
- printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n",
+ printk(KERN_INFO "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids: %d\n",
NR_CPUS, nr_cpu_ids, nr_node_ids);
/* Setup percpu data maps */
@@ -213,6 +233,7 @@ void __init setup_per_cpu_areas(void)
* Requires node_possible_map to be valid.
*
* Note: node_to_cpumask() is not valid until after this is done.
+ * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.)
*/
static void __init setup_node_to_cpumask_map(void)
{
@@ -228,6 +249,7 @@ static void __init setup_node_to_cpumask
/* allocate the map */
map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t));
+ DBG("node_to_cpumask_map at %p for %d nodes\n", map, nr_node_ids);
pr_debug(KERN_DEBUG "Node to cpumask map at %p for %d nodes\n",
map, nr_node_ids);
@@ -240,17 +262,23 @@ void __cpuinit numa_set_node(int cpu, in
{
int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
- if (cpu_pda(cpu) && node != NUMA_NO_NODE)
- cpu_pda(cpu)->nodenumber = node;
-
- if (cpu_to_node_map)
+ /* early setting, no percpu area yet */
+ if (cpu_to_node_map) {
cpu_to_node_map[cpu] = node;
+ return;
+ }
- else if (per_cpu_offset(cpu))
- per_cpu(x86_cpu_to_node_map, cpu) = node;
+#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+ if (cpu >= nr_cpu_ids || !per_cpu_offset(cpu)) {
+ printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu);
+ dump_stack();
+ return;
+ }
+#endif
+ per_cpu(x86_cpu_to_node_map, cpu) = node;
- else
- pr_debug("Setting node for non-present cpu %d\n", cpu);
+ if (node != NUMA_NO_NODE)
+ cpu_pda(cpu)->nodenumber = node;
}
void __cpuinit numa_clear_node(int cpu)
@@ -267,7 +295,7 @@ void __cpuinit numa_add_cpu(int cpu)
void __cpuinit numa_remove_cpu(int cpu)
{
- cpu_clear(cpu, node_to_cpumask_map[cpu_to_node(cpu)]);
+ cpu_clear(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
}
#else /* CONFIG_DEBUG_PER_CPU_MAPS */
@@ -277,7 +305,7 @@ void __cpuinit numa_remove_cpu(int cpu)
*/
static void __cpuinit numa_set_cpumask(int cpu, int enable)
{
- int node = cpu_to_node(cpu);
+ int node = early_cpu_to_node(cpu);
cpumask_t *mask;
char buf[64];
--
^ permalink raw reply [flat|nested] 12+ messages in thread* [PATCH 2/4] x86_64: Base percpu variables at zero
2008-07-25 21:11 [PATCH 0/4] x86_64: Optimize percpu accesses Mike Travis
2008-07-25 21:11 ` [PATCH 1/4] x86_64: Cleanup early setup_percpu references Mike Travis
@ 2008-07-25 21:11 ` Mike Travis
2008-07-25 21:11 ` [PATCH 3/4] x86_64: Fold pda into per cpu area Mike Travis
` (4 subsequent siblings)
6 siblings, 0 replies; 12+ messages in thread
From: Mike Travis @ 2008-07-25 21:11 UTC (permalink / raw)
To: Ingo Molnar, Andrew Morton
Cc: Eric W. Biederman, Hugh Dickins, Jack Steiner,
Jeremy Fitzhardinge, H. Peter Anvin, linux-kernel,
Christoph Lameter
[-- Attachment #1: zero_based_only --]
[-- Type: text/plain, Size: 6744 bytes --]
WARNING: There is still a FIXME in this patch (see arch/x86/kernel/acpi/sleep.c)
[Advice on how to fix it most welcome... ;-)]
* Make the x86_64 per cpu area start at zero.
* Relocate the per_cpu(gdt_page) in head_64.S for the boot cpu (0).
For secondary cpus, do_boot_cpu() sets up the correct gdt_page pointer.
* Initialize per_cpu_offset to point to static pda in the per_cpu area
(@ __per_cpu_load).
* After allocation of the per cpu area for the boot cpu (0), reload the
gdt page pointer.
Based on linux-2.6.tip/master
Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Mike Travis <travis@sgi.com>
---
arch/x86/Kconfig | 3 ++
arch/x86/kernel/acpi/sleep.c | 9 ++++++++
arch/x86/kernel/head_64.S | 26 ++++++++++++++++++++++--
arch/x86/kernel/setup_percpu.c | 42 ++++++++++++++++++++++++++++++++-------
arch/x86/kernel/vmlinux_64.lds.S | 1
5 files changed, 72 insertions(+), 9 deletions(-)
--- linux-2.6.tip.orig/arch/x86/Kconfig
+++ linux-2.6.tip/arch/x86/Kconfig
@@ -129,6 +129,9 @@ config HAVE_SETUP_PER_CPU_AREA
config HAVE_CPUMASK_OF_CPU_MAP
def_bool X86_64_SMP
+config HAVE_ZERO_BASED_PER_CPU
+ def_bool X86_64_SMP
+
config ARCH_HIBERNATION_POSSIBLE
def_bool y
depends on !SMP || !X86_VOYAGER
--- linux-2.6.tip.orig/arch/x86/kernel/acpi/sleep.c
+++ linux-2.6.tip/arch/x86/kernel/acpi/sleep.c
@@ -99,6 +99,15 @@ int acpi_save_state_mem(void)
#ifdef CONFIG_SMP
stack_start.sp = temp_stack + 4096;
#endif
+ /*
+ * FIXME: with zero-based percpu variables, the pda and gdt_page
+ * addresses must be offset by the base of this cpu's percpu area.
+ * Where/how should we do this?
+ *
+ * for secondary cpu startup in smpboot.c:do_boot_cpu() this is done:
+ * early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
+ * initial_pda = (unsigned long)get_cpu_pda(cpu);
+ */
initial_code = (unsigned long)wakeup_long64;
saved_magic = 0x123456789abcdef0;
#endif /* CONFIG_64BIT */
--- linux-2.6.tip.orig/arch/x86/kernel/head_64.S
+++ linux-2.6.tip/arch/x86/kernel/head_64.S
@@ -12,6 +12,7 @@
#include <linux/linkage.h>
#include <linux/threads.h>
#include <linux/init.h>
+#include <asm/asm-offsets.h>
#include <asm/desc.h>
#include <asm/segment.h>
#include <asm/pgtable.h>
@@ -210,7 +211,27 @@ ENTRY(secondary_startup_64)
* addresses where we're currently running on. We have to do that here
* because in 32bit we couldn't load a 64bit linear address.
*/
- lgdt early_gdt_descr(%rip)
+
+#ifdef CONFIG_SMP
+ /*
+ * For zero-based percpu variables, the base (__per_cpu_load) must
+ * be added to the offset of per_cpu__gdt_page. This is only needed
+ * for the boot cpu but we can't do this prior to secondary_startup_64.
+ * So we use a NULL gdt adrs to indicate that we are starting up the
+ * boot cpu and not the secondary cpus. do_boot_cpu() will fixup
+ * the gdt adrs for those cpus.
+ */
+#define PER_CPU_GDT_PAGE 0
+ movq early_gdt_descr_base(%rip), %rax
+ testq %rax, %rax
+ jnz 1f
+ movq $__per_cpu_load, %rax
+ addq $per_cpu__gdt_page, %rax
+ movq %rax, early_gdt_descr_base(%rip)
+#else
+#define PER_CPU_GDT_PAGE per_cpu__gdt_page
+#endif
+1: lgdt early_gdt_descr(%rip)
/* set up data segments. actually 0 would do too */
movl $__KERNEL_DS,%eax
@@ -401,7 +422,8 @@ NEXT_PAGE(level2_spare_pgt)
.globl early_gdt_descr
early_gdt_descr:
.word GDT_ENTRIES*8-1
- .quad per_cpu__gdt_page
+early_gdt_descr_base:
+ .quad PER_CPU_GDT_PAGE # Overwritten for secondary CPUs
ENTRY(phys_base)
/* This must match the first entry in level2_kernel_pgt */
--- linux-2.6.tip.orig/arch/x86/kernel/setup_percpu.c
+++ linux-2.6.tip/arch/x86/kernel/setup_percpu.c
@@ -14,6 +14,7 @@
#include <asm/mpspec.h>
#include <asm/apicdef.h>
#include <asm/highmem.h>
+#include <asm/desc.h>
#ifdef CONFIG_DEBUG_PER_CPU_MAPS
# define DBG(x...) printk(KERN_DEBUG x)
@@ -119,16 +120,21 @@ static void __init setup_cpumask_of_cpu(
static inline void setup_cpumask_of_cpu(void) { }
#endif
-#ifdef CONFIG_X86_32
/*
- * Great future not-so-futuristic plan: make i386 and x86_64 do it
- * the same way
+ * Pointers to per cpu areas for each cpu
*/
+#ifdef CONFIG_HAVE_ZERO_BASED_PER_CPU
+
+/* Initialize percpu offset for boot cpu (0) */
+unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = {
+ [0] = (unsigned long)__per_cpu_load
+};
+#else
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
+#endif
EXPORT_SYMBOL(__per_cpu_offset);
-static inline void setup_cpu_pda_map(void) { }
-#elif !defined(CONFIG_SMP)
+#if !defined(CONFIG_SMP) || !defined(CONFIG_X86_64)
static inline void setup_cpu_pda_map(void) { }
#else /* CONFIG_SMP && CONFIG_X86_64 */
@@ -160,8 +166,10 @@ static void __init setup_cpu_pda_map(voi
if (cpu == 0) {
/* leave boot cpu pda in place */
new_cpu_pda[0] = cpu_pda(0);
+ DBG("cpu %4d pda %p\n", cpu, cpu_pda(0));
continue;
}
+ DBG("cpu %4d pda %p\n", cpu, pda);
new_cpu_pda[cpu] = (struct x8664_pda *)pda;
new_cpu_pda[cpu]->in_bootmem = 1;
pda += size;
@@ -191,6 +199,8 @@ void __init setup_per_cpu_areas(void)
printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n",
size);
+ DBG("PERCPU: __per_cpu_start %p\n", __per_cpu_start);
+
for_each_possible_cpu(cpu) {
#ifndef CONFIG_NEED_MULTIPLE_NODES
ptr = alloc_bootmem_pages(size);
@@ -205,10 +215,28 @@ void __init setup_per_cpu_areas(void)
else
ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
#endif
+ DBG("PERCPU: cpu %4d %p pda %p %p\n",
+ cpu, ptr, _cpu_pda[cpu], cpu_pda(cpu));
+
+ /* Initialize each cpu's per_cpu area and save pointer */
+ memcpy(ptr, __per_cpu_load, __per_cpu_size);
per_cpu_offset(cpu) = ptr - __per_cpu_start;
- memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
- DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
+#ifdef CONFIG_X86_64
+ /* save for __my_cpu_offset() */
+ cpu_pda(cpu)->data_offset = (unsigned long)ptr;
+
+ /*
+ * The boot cpu gdt page must be reloaded as we moved it
+ * from the static per cpu area to the newly allocated area.
+ */
+ if (cpu == 0) {
+ struct desc_ptr gdt_descr = early_gdt_descr;
+
+ gdt_descr.address = (unsigned long)get_cpu_gdt_table(0);
+ native_load_gdt(&gdt_descr);
+ }
+#endif
}
printk(KERN_INFO "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids: %d\n",
--- linux-2.6.tip.orig/arch/x86/kernel/vmlinux_64.lds.S
+++ linux-2.6.tip/arch/x86/kernel/vmlinux_64.lds.S
@@ -16,6 +16,7 @@ jiffies_64 = jiffies;
_proxy_pda = 1;
PHDRS {
text PT_LOAD FLAGS(5); /* R_E */
+ percpu PT_LOAD FLAGS(7); /* RWE */
data PT_LOAD FLAGS(7); /* RWE */
user PT_LOAD FLAGS(7); /* RWE */
data.init PT_LOAD FLAGS(7); /* RWE */
--
^ permalink raw reply [flat|nested] 12+ messages in thread* [PATCH 3/4] x86_64: Fold pda into per cpu area
2008-07-25 21:11 [PATCH 0/4] x86_64: Optimize percpu accesses Mike Travis
2008-07-25 21:11 ` [PATCH 1/4] x86_64: Cleanup early setup_percpu references Mike Travis
2008-07-25 21:11 ` [PATCH 2/4] x86_64: Base percpu variables at zero Mike Travis
@ 2008-07-25 21:11 ` Mike Travis
2008-07-25 21:11 ` [PATCH 4/4] x86_64: Reference zero-based percpu variables offset from gs Mike Travis
` (3 subsequent siblings)
6 siblings, 0 replies; 12+ messages in thread
From: Mike Travis @ 2008-07-25 21:11 UTC (permalink / raw)
To: Ingo Molnar, Andrew Morton
Cc: Eric W. Biederman, Hugh Dickins, Jack Steiner,
Jeremy Fitzhardinge, H. Peter Anvin, linux-kernel,
Christoph Lameter
[-- Attachment #1: fold_pda_into_percpu --]
[-- Type: text/plain, Size: 13594 bytes --]
WARNING: there are two FIXME's in arch/x86/xen/enlighten.c
and arch/x86/xen/smp.c that I'm not sure how to handle...?
* Declare the pda as a per cpu variable.
* Relocate the initial pda in head_64.S for the boot cpu (0).
For secondary cpus, do_boot_cpu() sets up the correct initial pda.
Based on linux-2.6.tip/master
Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Mike Travis <travis@sgi.com>
---
arch/x86/kernel/cpu/common_64.c | 4 -
arch/x86/kernel/head64.c | 29 +-----------
arch/x86/kernel/head_64.S | 19 ++++++--
arch/x86/kernel/setup_percpu.c | 93 +++++++++++-----------------------------
arch/x86/kernel/smpboot.c | 53 ----------------------
arch/x86/xen/enlighten.c | 10 ++++
arch/x86/xen/smp.c | 11 +---
include/asm-x86/desc.h | 5 ++
include/asm-x86/pda.h | 3 -
include/asm-x86/percpu.h | 13 -----
include/asm-x86/setup.h | 1
include/asm-x86/smp.h | 2
include/asm-x86/trampoline.h | 1
13 files changed, 72 insertions(+), 172 deletions(-)
--- linux-2.6.tip.orig/arch/x86/kernel/cpu/common_64.c
+++ linux-2.6.tip/arch/x86/kernel/cpu/common_64.c
@@ -418,8 +418,8 @@ __setup("clearcpuid=", setup_disablecpui
cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
-struct x8664_pda **_cpu_pda __read_mostly;
-EXPORT_SYMBOL(_cpu_pda);
+DEFINE_PER_CPU_FIRST(struct x8664_pda, pda);
+EXPORT_PER_CPU_SYMBOL(pda);
struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
--- linux-2.6.tip.orig/arch/x86/kernel/head64.c
+++ linux-2.6.tip/arch/x86/kernel/head64.c
@@ -25,27 +25,6 @@
#include <asm/e820.h>
#include <asm/bios_ebda.h>
-/* boot cpu pda */
-static struct x8664_pda _boot_cpu_pda __read_mostly;
-
-#ifdef CONFIG_SMP
-/*
- * We install an empty cpu_pda pointer table to indicate to early users
- * (numa_set_node) that the cpu_pda pointer table for cpus other than
- * the boot cpu is not yet setup.
- */
-static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata;
-#else
-static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly;
-#endif
-
-void __init x86_64_init_pda(void)
-{
- _cpu_pda = __cpu_pda;
- cpu_pda(0) = &_boot_cpu_pda;
- pda_init(0);
-}
-
static void __init zap_identity_mappings(void)
{
pgd_t *pgd = pgd_offset_k(0UL);
@@ -98,6 +77,10 @@ void __init x86_64_start_kernel(char * r
/* Cleanup the over mapped high alias */
cleanup_highmap();
+ /* Initialize boot cpu_pda data */
+ /* (See head_64.S for earlier pda/gdt initialization) */
+ pda_init(0);
+
for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) {
#ifdef CONFIG_EARLY_PRINTK
set_intr_gate(i, &early_idt_handlers[i]);
@@ -109,10 +92,6 @@ void __init x86_64_start_kernel(char * r
early_printk("Kernel alive\n");
- x86_64_init_pda();
-
- early_printk("Kernel really alive\n");
-
x86_64_start_reservations(real_mode_data);
}
--- linux-2.6.tip.orig/arch/x86/kernel/head_64.S
+++ linux-2.6.tip/arch/x86/kernel/head_64.S
@@ -248,14 +248,21 @@ ENTRY(secondary_startup_64)
movl %eax,%gs
/*
- * Setup up a dummy PDA. this is just for some early bootup code
- * that does in_interrupt()
+ * Setup up the real PDA.
+ *
+ * For SMP, the boot cpu (0) uses the static pda which is the first
+ * element in the percpu area (@__per_cpu_load). This pda is moved
+ * to the real percpu area once that is allocated. Secondary cpus
+ * will use the initial_pda value setup in do_boot_cpu().
*/
movl $MSR_GS_BASE,%ecx
- movq $empty_zero_page,%rax
+ movq initial_pda(%rip), %rax
movq %rax,%rdx
shrq $32,%rdx
wrmsr
+#ifdef CONFIG_SMP
+ movq %rax, %gs:pda_data_offset
+#endif
/* esi is pointer to real mode structure with interesting info.
pass it to C */
@@ -278,6 +285,12 @@ ENTRY(secondary_startup_64)
.align 8
ENTRY(initial_code)
.quad x86_64_start_kernel
+ ENTRY(initial_pda)
+#ifdef CONFIG_SMP
+ .quad __per_cpu_load # Overwritten for secondary CPUs
+#else
+ .quad per_cpu__pda
+#endif
__FINITDATA
ENTRY(stack_start)
--- linux-2.6.tip.orig/arch/x86/kernel/setup_percpu.c
+++ linux-2.6.tip/arch/x86/kernel/setup_percpu.c
@@ -134,56 +134,8 @@ unsigned long __per_cpu_offset[NR_CPUS]
#endif
EXPORT_SYMBOL(__per_cpu_offset);
-#if !defined(CONFIG_SMP) || !defined(CONFIG_X86_64)
-static inline void setup_cpu_pda_map(void) { }
-
-#else /* CONFIG_SMP && CONFIG_X86_64 */
-
-/*
- * Allocate cpu_pda pointer table and array via alloc_bootmem.
- */
-static void __init setup_cpu_pda_map(void)
-{
- char *pda;
- struct x8664_pda **new_cpu_pda;
- unsigned long size;
- int cpu;
-
- size = roundup(sizeof(struct x8664_pda), cache_line_size());
-
- /* allocate cpu_pda array and pointer table */
- {
- unsigned long tsize = nr_cpu_ids * sizeof(void *);
- unsigned long asize = size * (nr_cpu_ids - 1);
-
- tsize = roundup(tsize, cache_line_size());
- new_cpu_pda = alloc_bootmem(tsize + asize);
- pda = (char *)new_cpu_pda + tsize;
- }
-
- /* initialize pointer table to static pda's */
- for_each_possible_cpu(cpu) {
- if (cpu == 0) {
- /* leave boot cpu pda in place */
- new_cpu_pda[0] = cpu_pda(0);
- DBG("cpu %4d pda %p\n", cpu, cpu_pda(0));
- continue;
- }
- DBG("cpu %4d pda %p\n", cpu, pda);
- new_cpu_pda[cpu] = (struct x8664_pda *)pda;
- new_cpu_pda[cpu]->in_bootmem = 1;
- pda += size;
- }
-
- /* point to new pointer table */
- _cpu_pda = new_cpu_pda;
-}
-#endif
-
/*
- * Great future plan:
- * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
- * Always point %gs to its beginning
+ * Allocate and initialize the per cpu areas which include the PDAs.
*/
void __init setup_per_cpu_areas(void)
{
@@ -191,16 +143,11 @@ void __init setup_per_cpu_areas(void)
char *ptr;
int cpu;
- /* Setup cpu_pda map */
- setup_cpu_pda_map();
-
/* Copy section for each CPU (we discard the original) */
size = PERCPU_ENOUGH_ROOM;
printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n",
size);
- DBG("PERCPU: __per_cpu_start %p\n", __per_cpu_start);
-
for_each_possible_cpu(cpu) {
#ifndef CONFIG_NEED_MULTIPLE_NODES
ptr = alloc_bootmem_pages(size);
@@ -215,26 +162,38 @@ void __init setup_per_cpu_areas(void)
else
ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
#endif
- DBG("PERCPU: cpu %4d %p pda %p %p\n",
- cpu, ptr, _cpu_pda[cpu], cpu_pda(cpu));
-
/* Initialize each cpu's per_cpu area and save pointer */
memcpy(ptr, __per_cpu_load, __per_cpu_size);
per_cpu_offset(cpu) = ptr - __per_cpu_start;
-#ifdef CONFIG_X86_64
- /* save for __my_cpu_offset() */
- cpu_pda(cpu)->data_offset = (unsigned long)ptr;
+ DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
+#ifdef CONFIG_X86_64
/*
- * The boot cpu gdt page must be reloaded as we moved it
- * from the static per cpu area to the newly allocated area.
+ * Note the boot cpu (0) has been using the static per_cpu load
+ * area for it's pda. We need to zero out the pdas for the
+ * other cpus that are coming online.
+ *
+ * Additionally, for the boot cpu the gdt page must be reloaded
+ * as we moved it from the static per cpu area to the newly
+ * allocated area.
*/
- if (cpu == 0) {
- struct desc_ptr gdt_descr = early_gdt_descr;
-
- gdt_descr.address = (unsigned long)get_cpu_gdt_table(0);
- native_load_gdt(&gdt_descr);
+ {
+ /* We rely on the fact that pda is the first element */
+ struct x8664_pda *pda = (struct x8664_pda *)ptr;
+
+ if (cpu) {
+ memset(pda, 0, sizeof(*pda));
+ pda->data_offset = (unsigned long)ptr;
+ } else {
+ struct desc_ptr gdt_descr = early_gdt_descr;
+
+ pda->data_offset = (unsigned long)ptr;
+ gdt_descr.address =
+ (unsigned long)get_cpu_gdt_table(0);
+ native_load_gdt(&gdt_descr);
+ pda_init(0);
+ }
}
#endif
}
--- linux-2.6.tip.orig/arch/x86/kernel/smpboot.c
+++ linux-2.6.tip/arch/x86/kernel/smpboot.c
@@ -744,45 +744,6 @@ static void __cpuinit do_fork_idle(struc
complete(&c_idle->done);
}
-#ifdef CONFIG_X86_64
-/*
- * Allocate node local memory for the AP pda.
- *
- * Must be called after the _cpu_pda pointer table is initialized.
- */
-int __cpuinit get_local_pda(int cpu)
-{
- struct x8664_pda *oldpda, *newpda;
- unsigned long size = sizeof(struct x8664_pda);
- int node = cpu_to_node(cpu);
-
- if (cpu_pda(cpu) && !cpu_pda(cpu)->in_bootmem)
- return 0;
-
- oldpda = cpu_pda(cpu);
- newpda = kmalloc_node(size, GFP_ATOMIC, node);
- if (!newpda) {
- printk(KERN_ERR "Could not allocate node local PDA "
- "for CPU %d on node %d\n", cpu, node);
-
- if (oldpda)
- return 0; /* have a usable pda */
- else
- return -1;
- }
-
- if (oldpda) {
- memcpy(newpda, oldpda, size);
- if (!after_bootmem)
- free_bootmem((unsigned long)oldpda, size);
- }
-
- newpda->in_bootmem = 0;
- cpu_pda(cpu) = newpda;
- return 0;
-}
-#endif /* CONFIG_X86_64 */
-
static int __cpuinit do_boot_cpu(int apicid, int cpu)
/*
* NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
@@ -800,16 +761,6 @@ static int __cpuinit do_boot_cpu(int api
};
INIT_WORK(&c_idle.work, do_fork_idle);
-#ifdef CONFIG_X86_64
- /* Allocate node local memory for AP pdas */
- if (cpu > 0) {
- boot_error = get_local_pda(cpu);
- if (boot_error)
- goto restore_state;
- /* if can't get pda memory, can't start cpu */
- }
-#endif
-
alternatives_smp_switch(1);
c_idle.idle = get_idle_for_cpu(cpu);
@@ -847,6 +798,7 @@ do_rest:
#else
cpu_pda(cpu)->pcurrent = c_idle.idle;
clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
+ initial_pda = (unsigned long)get_cpu_pda(cpu);
#endif
early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
initial_code = (unsigned long)start_secondary;
@@ -921,9 +873,6 @@ do_rest:
inquire_remote_apic(apicid);
}
}
-#ifdef CONFIG_X86_64
-restore_state:
-#endif
if (boot_error) {
/* Try to put things back the way they were before ... */
numa_remove_cpu(cpu); /* was set by numa_add_cpu */
--- linux-2.6.tip.orig/arch/x86/xen/enlighten.c
+++ linux-2.6.tip/arch/x86/xen/enlighten.c
@@ -1748,8 +1748,18 @@ asmlinkage void __init xen_start_kernel(
#ifdef CONFIG_X86_64
/* Disable until direct per-cpu data access. */
have_vcpu_info_placement = 0;
+#if 0
+ /*
+ * FIXME: is the above still true?
+ * Also, x86_64_init_pda() has been removed...
+ * should anything replace it?
+ * (The offset for cpu_pda(0) is statically initialized
+ * to __per_cpu_load, while the remaining pda's come online
+ * in setup_per_cpu_areas().)
+ */
x86_64_init_pda();
#endif
+#endif
xen_smp_init();
--- linux-2.6.tip.orig/arch/x86/xen/smp.c
+++ linux-2.6.tip/arch/x86/xen/smp.c
@@ -285,13 +285,10 @@ static int __cpuinit xen_cpu_up(unsigned
#endif
#ifdef CONFIG_X86_64
- /* Allocate node local memory for AP pdas */
- WARN_ON(cpu == 0);
- if (cpu > 0) {
- rc = get_local_pda(cpu);
- if (rc)
- return rc;
- }
+ /*
+ * FIXME: I don't believe that calling get_local_pda() is
+ * required any more...?
+ */
#endif
#ifdef CONFIG_X86_32
--- linux-2.6.tip.orig/include/asm-x86/desc.h
+++ linux-2.6.tip/include/asm-x86/desc.h
@@ -41,6 +41,11 @@ static inline struct desc_struct *get_cp
#ifdef CONFIG_X86_64
+static inline struct x8664_pda *get_cpu_pda(unsigned int cpu)
+{
+ return &per_cpu(pda, cpu);
+}
+
static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
unsigned dpl, unsigned ist, unsigned seg)
{
--- linux-2.6.tip.orig/include/asm-x86/pda.h
+++ linux-2.6.tip/include/asm-x86/pda.h
@@ -37,10 +37,9 @@ struct x8664_pda {
unsigned irq_spurious_count;
} ____cacheline_aligned_in_smp;
-extern struct x8664_pda **_cpu_pda;
extern void pda_init(int);
-#define cpu_pda(i) (_cpu_pda[i])
+#define cpu_pda(cpu) (&per_cpu(pda, cpu))
/*
* There is no fast way to get the base address of the PDA, all the accesses
--- linux-2.6.tip.orig/include/asm-x86/percpu.h
+++ linux-2.6.tip/include/asm-x86/percpu.h
@@ -3,20 +3,11 @@
#ifdef CONFIG_X86_64
#include <linux/compiler.h>
-
-/* Same as asm-generic/percpu.h, except that we store the per cpu offset
- in the PDA. Longer term the PDA and every per cpu variable
- should be just put into a single section and referenced directly
- from %gs */
-
-#ifdef CONFIG_SMP
#include <asm/pda.h>
-#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset)
+/* Same as asm-generic/percpu.h */
+#ifdef CONFIG_SMP
#define __my_cpu_offset read_pda(data_offset)
-
-#define per_cpu_offset(x) (__per_cpu_offset(x))
-
#endif
#include <asm-generic/percpu.h>
--- linux-2.6.tip.orig/include/asm-x86/setup.h
+++ linux-2.6.tip/include/asm-x86/setup.h
@@ -92,7 +92,6 @@ extern unsigned long init_pg_tables_star
extern unsigned long init_pg_tables_end;
#else
-void __init x86_64_init_pda(void);
void __init x86_64_start_kernel(char *real_mode);
void __init x86_64_start_reservations(char *real_mode_data);
--- linux-2.6.tip.orig/include/asm-x86/smp.h
+++ linux-2.6.tip/include/asm-x86/smp.h
@@ -25,8 +25,6 @@ extern cpumask_t cpu_callin_map;
extern void (*mtrr_hook)(void);
extern void zap_low_mappings(void);
-extern int __cpuinit get_local_pda(int cpu);
-
extern int smp_num_siblings;
extern unsigned int num_processors;
extern cpumask_t cpu_initialized;
--- linux-2.6.tip.orig/include/asm-x86/trampoline.h
+++ linux-2.6.tip/include/asm-x86/trampoline.h
@@ -12,6 +12,7 @@ extern unsigned char *trampoline_base;
extern unsigned long init_rsp;
extern unsigned long initial_code;
+extern unsigned long initial_pda;
#define TRAMPOLINE_BASE 0x6000
extern unsigned long setup_trampoline(void);
--
^ permalink raw reply [flat|nested] 12+ messages in thread* [PATCH 4/4] x86_64: Reference zero-based percpu variables offset from gs
2008-07-25 21:11 [PATCH 0/4] x86_64: Optimize percpu accesses Mike Travis
` (2 preceding siblings ...)
2008-07-25 21:11 ` [PATCH 3/4] x86_64: Fold pda into per cpu area Mike Travis
@ 2008-07-25 21:11 ` Mike Travis
2008-07-25 23:26 ` [PATCH 0/4] x86_64: Optimize percpu accesses Jeremy Fitzhardinge
` (2 subsequent siblings)
6 siblings, 0 replies; 12+ messages in thread
From: Mike Travis @ 2008-07-25 21:11 UTC (permalink / raw)
To: Ingo Molnar, Andrew Morton
Cc: Eric W. Biederman, Hugh Dickins, Jack Steiner,
Jeremy Fitzhardinge, H. Peter Anvin, linux-kernel,
Christoph Lameter
[-- Attachment #1: zero_based_use_gs --]
[-- Type: text/plain, Size: 3411 bytes --]
* Now that %gs is pointing to the pda, it will then also point to the
per cpu variables and the __get_cpu_var() and __put_cpu_var() macros
can use:
%gs:[&per_cpu_xxxx - __per_cpu_start]
... and since __per_cpu_start == 0 then:
%gs:&per_cpu_var(xxx)
becomes the optimized effective address.
Since this is now a single instruction, we can remove the x86_64
non-preemptible versions of x86_read_percpu() and x86_write_percpu().
* Other cleanups in include/asm-x86/percpu.h
Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Mike Travis <travis@sgi.com>
---
include/asm-x86/percpu.h | 62 +++++++++--------------------------------------
1 file changed, 13 insertions(+), 49 deletions(-)
--- linux-2.6.tip.orig/include/asm-x86/percpu.h
+++ linux-2.6.tip/include/asm-x86/percpu.h
@@ -5,41 +5,19 @@
#include <linux/compiler.h>
#include <asm/pda.h>
-/* Same as asm-generic/percpu.h */
+/* Same as asm-generic/percpu.h, except we use %gs as a segment offset. */
#ifdef CONFIG_SMP
#define __my_cpu_offset read_pda(data_offset)
+#define __percpu_seg "%%gs:"
+#else
+#define __percpu_seg ""
#endif
+
#include <asm-generic/percpu.h>
DECLARE_PER_CPU(struct x8664_pda, pda);
-/*
- * These are supposed to be implemented as a single instruction which
- * operates on the per-cpu data base segment. x86-64 doesn't have
- * that yet, so this is a fairly inefficient workaround for the
- * meantime. The single instruction is atomic with respect to
- * preemption and interrupts, so we need to explicitly disable
- * interrupts here to achieve the same effect. However, because it
- * can be used from within interrupt-disable/enable, we can't actually
- * disable interrupts; disabling preemption is enough.
- */
-#define x86_read_percpu(var) \
- ({ \
- typeof(per_cpu_var(var)) __tmp; \
- preempt_disable(); \
- __tmp = __get_cpu_var(var); \
- preempt_enable(); \
- __tmp; \
- })
-
-#define x86_write_percpu(var, val) \
- do { \
- preempt_disable(); \
- __get_cpu_var(var) = (val); \
- preempt_enable(); \
- } while(0)
-
-#else /* CONFIG_X86_64 */
+#else /* !CONFIG_X86_64 */
#ifdef __ASSEMBLY__
@@ -68,36 +46,23 @@ DECLARE_PER_CPU(struct x8664_pda, pda);
#else /* ...!ASSEMBLY */
-/*
- * PER_CPU finds an address of a per-cpu variable.
- *
- * Args:
- * var - variable name
- * cpu - 32bit register containing the current CPU number
- *
- * The resulting address is stored in the "cpu" argument.
- *
- * Example:
- * PER_CPU(cpu_gdt_descr, %ebx)
- */
#ifdef CONFIG_SMP
-
#define __my_cpu_offset x86_read_percpu(this_cpu_off)
-
-/* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */
#define __percpu_seg "%%fs:"
-
-#else /* !SMP */
-
+#else
#define __percpu_seg ""
-
-#endif /* SMP */
+#endif
#include <asm-generic/percpu.h>
/* We can use this directly for local CPU (faster). */
DECLARE_PER_CPU(unsigned long, this_cpu_off);
+#endif /* __ASSEMBLY__ */
+#endif /* !CONFIG_X86_64 */
+
+#ifndef __ASSEMBLY__
+
/* For arch-specific code, we can use direct single-insn ops (they
* don't give an lvalue though). */
extern void __bad_percpu_size(void);
@@ -232,7 +197,6 @@ do { \
percpu_cmpxchg_op(per_cpu_var(var), old, new)
#endif /* !__ASSEMBLY__ */
-#endif /* !CONFIG_X86_64 */
#ifdef CONFIG_SMP
--
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [PATCH 0/4] x86_64: Optimize percpu accesses
2008-07-25 21:11 [PATCH 0/4] x86_64: Optimize percpu accesses Mike Travis
` (3 preceding siblings ...)
2008-07-25 21:11 ` [PATCH 4/4] x86_64: Reference zero-based percpu variables offset from gs Mike Travis
@ 2008-07-25 23:26 ` Jeremy Fitzhardinge
2008-07-26 0:27 ` Mike Travis
2008-07-26 12:38 ` Ingo Molnar
2008-07-28 15:52 ` [crash] " Ingo Molnar
6 siblings, 1 reply; 12+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-25 23:26 UTC (permalink / raw)
To: Mike Travis
Cc: Ingo Molnar, Andrew Morton, Eric W. Biederman, Hugh Dickins,
Jack Steiner, H. Peter Anvin, linux-kernel
Mike Travis wrote:
> This patchset provides the following:
>
> * x86_64: Cleanup setup_percpu by fixing some minor potential
> problems as well as add some debugging aids.
>
> * x86_64: Rebase per cpu variables to zero
>
> Rebase per cpu variables to zero in preparation for the following
> patch to fold the pda into the per cpu area.
>
> * x86_64: Fold pda into per cpu area
>
> Declare the pda as a per cpu variable. This will allow the per cpu
> variables to be accessible on the x86_64 using %gs as the base of
> the percpu areas for each cpu:
>
> %gs:per_cpu_xxxx
>
> * x86_64: Reference zero-based percpu variables offset from gs
>
> Actually implement the above operation for __get_cpu_var() and
> __put_cpu_var(). Since this is now a single instruction, we
> can remove the non-preemptible versions of x86_read_percpu()
> and x86_write_percpu().
>
No, I think you've misunderstood these calls.
get_cpu_var(x) evaluates to an lvalue of this cpu's 'x'. It disables
preemption, in the same manner as get_cpu().
put_cpu_var(x) does nothing more than re-enable preemption, to pair with
get_cpu_var().
__get_cpu_var(x) is the same as get_cpu_var, but it assumes that
preemption is already disabled. There is no __put_cpu_var().
The important point is that an expression like "__get_cpu_var(x) = foo"
does not evaluate to a single instruction, and is not preempt or
interrupt -atomic. That's the reason x86_X_percpu() exist, since
they're a single instruction in an asm. However, with %gs: based
addressing they can be easily unified.
J
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [PATCH 0/4] x86_64: Optimize percpu accesses
2008-07-25 23:26 ` [PATCH 0/4] x86_64: Optimize percpu accesses Jeremy Fitzhardinge
@ 2008-07-26 0:27 ` Mike Travis
2008-07-26 0:30 ` Jeremy Fitzhardinge
0 siblings, 1 reply; 12+ messages in thread
From: Mike Travis @ 2008-07-26 0:27 UTC (permalink / raw)
To: Jeremy Fitzhardinge
Cc: Ingo Molnar, Andrew Morton, Eric W. Biederman, Hugh Dickins,
Jack Steiner, H. Peter Anvin, linux-kernel
Jeremy Fitzhardinge wrote:
> Mike Travis wrote:
>> This patchset provides the following:
>>
>> * x86_64: Cleanup setup_percpu by fixing some minor potential
>> problems as well as add some debugging aids.
>>
>> * x86_64: Rebase per cpu variables to zero
>>
>> Rebase per cpu variables to zero in preparation for the following
>> patch to fold the pda into the per cpu area.
>>
>> * x86_64: Fold pda into per cpu area
>>
>> Declare the pda as a per cpu variable. This will allow the per cpu
>> variables to be accessible on the x86_64 using %gs as the base of
>> the percpu areas for each cpu:
>>
>> %gs:per_cpu_xxxx
>>
>> * x86_64: Reference zero-based percpu variables offset from gs
>>
>> Actually implement the above operation for __get_cpu_var() and
>> __put_cpu_var(). Since this is now a single instruction, we
>> can remove the non-preemptible versions of x86_read_percpu()
>> and x86_write_percpu().
>>
>
> No, I think you've misunderstood these calls.
>
> get_cpu_var(x) evaluates to an lvalue of this cpu's 'x'. It disables
> preemption, in the same manner as get_cpu().
>
> put_cpu_var(x) does nothing more than re-enable preemption, to pair with
> get_cpu_var().
>
> __get_cpu_var(x) is the same as get_cpu_var, but it assumes that
> preemption is already disabled. There is no __put_cpu_var().
>
> The important point is that an expression like "__get_cpu_var(x) = foo"
> does not evaluate to a single instruction, and is not preempt or
> interrupt -atomic. That's the reason x86_X_percpu() exist, since
> they're a single instruction in an asm. However, with %gs: based
> addressing they can be easily unified.
>
> J
Yes, you're right, I wrote that quickly without really reading it back.
My point is that now that x86_read_percpu() and x86_write_percpu() do
evaluate to a single instruction (by definition atomic), then it doesn't
need to be surrounded by the preempt_disable()/preempt_enable() calls.
It appears as if I'm implying that's the case for get/put_cpu_var().
Thanks,
Mike
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 0/4] x86_64: Optimize percpu accesses
2008-07-26 0:27 ` Mike Travis
@ 2008-07-26 0:30 ` Jeremy Fitzhardinge
0 siblings, 0 replies; 12+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-26 0:30 UTC (permalink / raw)
To: Mike Travis
Cc: Ingo Molnar, Andrew Morton, Eric W. Biederman, Hugh Dickins,
Jack Steiner, H. Peter Anvin, linux-kernel
Mike Travis wrote:
> Yes, you're right, I wrote that quickly without really reading it back.
> My point is that now that x86_read_percpu() and x86_write_percpu() do
> evaluate to a single instruction (by definition atomic), then it doesn't
> need to be surrounded by the preempt_disable()/preempt_enable() calls.
>
Yep, correct.
> It appears as if I'm implying that's the case for get/put_cpu_var().
>
Right.
J
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 0/4] x86_64: Optimize percpu accesses
2008-07-25 21:11 [PATCH 0/4] x86_64: Optimize percpu accesses Mike Travis
` (4 preceding siblings ...)
2008-07-25 23:26 ` [PATCH 0/4] x86_64: Optimize percpu accesses Jeremy Fitzhardinge
@ 2008-07-26 12:38 ` Ingo Molnar
2008-07-28 18:33 ` Mike Travis
2008-07-28 15:52 ` [crash] " Ingo Molnar
6 siblings, 1 reply; 12+ messages in thread
From: Ingo Molnar @ 2008-07-26 12:38 UTC (permalink / raw)
To: Mike Travis
Cc: Andrew Morton, Eric W. Biederman, Hugh Dickins, Jack Steiner,
Jeremy Fitzhardinge, H. Peter Anvin, linux-kernel
* Mike Travis <travis@sgi.com> wrote:
> This patchset provides the following:
>
> * x86_64: Cleanup setup_percpu by fixing some minor potential
> problems as well as add some debugging aids.
>
> * x86_64: Rebase per cpu variables to zero
>
> Rebase per cpu variables to zero in preparation for the following
> patch to fold the pda into the per cpu area.
>
> * x86_64: Fold pda into per cpu area
>
> Declare the pda as a per cpu variable. This will allow the per cpu
> variables to be accessible on the x86_64 using %gs as the base of
> the percpu areas for each cpu:
>
> %gs:per_cpu_xxxx
>
> * x86_64: Reference zero-based percpu variables offset from gs
>
> Actually implement the above operation for __get_cpu_var() and
> __put_cpu_var(). Since this is now a single instruction, we
> can remove the non-preemptible versions of x86_read_percpu()
> and x86_write_percpu().
>
> Note that the following changes are NOT in this patchset as the plan now
> seems to be that the common (to x86) variables that are in the pda should
> be made individual per cpu variables, leaving only the stack canary in place.
>
> * x86_64: Replace cpu_pda ops with percpu ops
> * x86_64: Replace xxx_pda() operations with x86_xxx_percpu().
> * x86_64: Remove xxx_pda() operations
> * x86_64: Remove cpu_pda() macro
>
> Based on linux-2.6.tip/master.
i've added these patches to tip/x86/percpu-zerobased, but not yet merged
into tip/master. I've made it -git based - does this patchset have any
functional dependencies on other patches?
Ingo
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [PATCH 0/4] x86_64: Optimize percpu accesses
2008-07-26 12:38 ` Ingo Molnar
@ 2008-07-28 18:33 ` Mike Travis
0 siblings, 0 replies; 12+ messages in thread
From: Mike Travis @ 2008-07-28 18:33 UTC (permalink / raw)
To: Ingo Molnar
Cc: Andrew Morton, Eric W. Biederman, Hugh Dickins, Jack Steiner,
Jeremy Fitzhardinge, H. Peter Anvin, linux-kernel
Ingo Molnar wrote:
> * Mike Travis <travis@sgi.com> wrote:
>
>> This patchset provides the following:
>>
>> * x86_64: Cleanup setup_percpu by fixing some minor potential
>> problems as well as add some debugging aids.
>>
>> * x86_64: Rebase per cpu variables to zero
>>
>> Rebase per cpu variables to zero in preparation for the following
>> patch to fold the pda into the per cpu area.
>>
>> * x86_64: Fold pda into per cpu area
>>
>> Declare the pda as a per cpu variable. This will allow the per cpu
>> variables to be accessible on the x86_64 using %gs as the base of
>> the percpu areas for each cpu:
>>
>> %gs:per_cpu_xxxx
>>
>> * x86_64: Reference zero-based percpu variables offset from gs
>>
>> Actually implement the above operation for __get_cpu_var() and
>> __put_cpu_var(). Since this is now a single instruction, we
>> can remove the non-preemptible versions of x86_read_percpu()
>> and x86_write_percpu().
>>
>> Note that the following changes are NOT in this patchset as the plan now
>> seems to be that the common (to x86) variables that are in the pda should
>> be made individual per cpu variables, leaving only the stack canary in place.
>>
>> * x86_64: Replace cpu_pda ops with percpu ops
>> * x86_64: Replace xxx_pda() operations with x86_xxx_percpu().
>> * x86_64: Remove xxx_pda() operations
>> * x86_64: Remove cpu_pda() macro
>>
>> Based on linux-2.6.tip/master.
>
> i've added these patches to tip/x86/percpu-zerobased, but not yet merged
> into tip/master. I've made it -git based - does this patchset have any
> functional dependencies on other patches?
>
> Ingo
I think the other patches have been in place for a while. This was actually
patch 3 of about 20 that finalized with the CPU_ALLOC changes. In my tree
the 2 prior to this one are:
b3a0cb456d848e10b2f7b371ba05e44f1384520a
Subject: Zero based percpu: Infrastructure to rebase the per cpu area to zero
d3794979a8a80c222ce9d016a6dfc4bed36965d0
Subject: x86: Extend percpu ops to 64 bit
Thanks,
Mike
^ permalink raw reply [flat|nested] 12+ messages in thread
* [crash] Re: [PATCH 0/4] x86_64: Optimize percpu accesses
2008-07-25 21:11 [PATCH 0/4] x86_64: Optimize percpu accesses Mike Travis
` (5 preceding siblings ...)
2008-07-26 12:38 ` Ingo Molnar
@ 2008-07-28 15:52 ` Ingo Molnar
2008-07-28 19:39 ` Mike Travis
6 siblings, 1 reply; 12+ messages in thread
From: Ingo Molnar @ 2008-07-28 15:52 UTC (permalink / raw)
To: Mike Travis
Cc: Andrew Morton, Eric W. Biederman, Hugh Dickins, Jack Steiner,
Jeremy Fitzhardinge, H. Peter Anvin, linux-kernel,
Thomas Gleixner
ok, i have integrated tip/x86/percpu-zerobased into tip/master briefly,
but it blew up almost immediately in testing, on two boxes.
one bad config is:
http://redhat.com/~mingo/misc/config-Mon_Jul_28_17_35_00_CEST_2008.bad
failure pattern: it booted up fine to userspace and seemed function, but
then produced a spontaneous reboot while building a kernel, without any
log entries.
other bad config is:
http://redhat.com/~mingo/misc/config-Mon_Jul_28_17_30_39_CEST_2008.bad
failure pattern: early crash at:
PANIC: early exception 0e rip 10:fffffff817dfc1a error 0 cr2 28
which corresponds to:
ffffffff817dfc0f <machine_specific_memory_setup>:
ffffffff817dfc0f: 48 8b 05 aa cf 04 00 mov 315306(%rip),%rax
# ffffffff8182cbc0 <x86_quirks>
ffffffff817dfc16: 55 push %rbp
ffffffff817dfc17: 48 89 e5 mov %rsp,%rbp
ffffffff817dfc1a: 48 8b 40 28 mov 0x28(%rax),%rax [*]
ffffffff817dfc1e: 48 85 c0 test %rax,%rax
i.e. RAX was zero.
i've pushed out the tip/tmp.x86/percpu-zerobased.bad branch which shows
the exact kernel that failed. It was generated by:
git-checkout tip/master
git-merge tip/x86/percpu-zerobased
Ingo
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [crash] Re: [PATCH 0/4] x86_64: Optimize percpu accesses
2008-07-28 15:52 ` [crash] " Ingo Molnar
@ 2008-07-28 19:39 ` Mike Travis
0 siblings, 0 replies; 12+ messages in thread
From: Mike Travis @ 2008-07-28 19:39 UTC (permalink / raw)
To: Ingo Molnar
Cc: Andrew Morton, Eric W. Biederman, Hugh Dickins, Jack Steiner,
Jeremy Fitzhardinge, H. Peter Anvin, linux-kernel,
Thomas Gleixner
Ingo Molnar wrote:
> ok, i have integrated tip/x86/percpu-zerobased into tip/master briefly,
> but it blew up almost immediately in testing, on two boxes.
>
> one bad config is:
>
> http://redhat.com/~mingo/misc/config-Mon_Jul_28_17_35_00_CEST_2008.bad
>
> failure pattern: it booted up fine to userspace and seemed function, but
> then produced a spontaneous reboot while building a kernel, without any
> log entries.
>
> other bad config is:
>
> http://redhat.com/~mingo/misc/config-Mon_Jul_28_17_30_39_CEST_2008.bad
>
> failure pattern: early crash at:
>
> PANIC: early exception 0e rip 10:fffffff817dfc1a error 0 cr2 28
>
> which corresponds to:
>
> ffffffff817dfc0f <machine_specific_memory_setup>:
> ffffffff817dfc0f: 48 8b 05 aa cf 04 00 mov 315306(%rip),%rax
> # ffffffff8182cbc0 <x86_quirks>
> ffffffff817dfc16: 55 push %rbp
> ffffffff817dfc17: 48 89 e5 mov %rsp,%rbp
> ffffffff817dfc1a: 48 8b 40 28 mov 0x28(%rax),%rax [*]
> ffffffff817dfc1e: 48 85 c0 test %rax,%rax
>
> i.e. RAX was zero.
>
> i've pushed out the tip/tmp.x86/percpu-zerobased.bad branch which shows
> the exact kernel that failed. It was generated by:
>
> git-checkout tip/master
> git-merge tip/x86/percpu-zerobased
>
> Ingo
Ok, thanks, I'll take a look. There were some questions that I had
(and I should have RFC'd the patch since there are still questions.)
Mike
^ permalink raw reply [flat|nested] 12+ messages in thread