* PDA changes (take 3)
@ 2009-01-03 4:22 Brian Gerst
2009-01-03 4:23 ` [PATCH 1/4] x86-64: Convert the PDA to percpu Brian Gerst
0 siblings, 1 reply; 6+ messages in thread
From: Brian Gerst @ 2009-01-03 4:22 UTC (permalink / raw)
To: H. Peter Anvin; +Cc: Ingo Molnar, linux-kernel, the arch/x86 maintainers
On Fri, Jan 2, 2009 at 3:11 PM, H. Peter Anvin <hpa@zytor.com> wrote:
> Brian Gerst wrote:
>> Here are the PDA patches again. I changed the first patch so that the PDA is cacheline
>> aligned. I dropped the cpunumber patch for now, and added conversions of the TLB state
>> and IRQ stats to match the 32-bit code. Stats for defconfig:
>>
>> text data bss dec hex filename
>> 7033649 1754476 758508 9546633 91ab89 vmlinux.before
>> 7029643 1754716 758508 9542867 919cd3 vmlinux.after
>>
>> Patches are against 2.6.28.
>
> Hi there,
>
> I just tried to apply your patchset, but it fails rather dramatically on
> patch 3. Could you refresh it against tip:master or current Linus git?
It was a conflict with commit 915b0d0104b72fd36af088ba4b11b5690bc96a6c
(x86: hardirq: introduce inc_irq_stat()). Resending the series
rebased against current Linus git.
--
Brian Gerst
^ permalink raw reply [flat|nested] 6+ messages in thread* [PATCH 1/4] x86-64: Convert the PDA to percpu. 2009-01-03 4:22 PDA changes (take 3) Brian Gerst @ 2009-01-03 4:23 ` Brian Gerst 2009-01-03 4:23 ` [PATCH 2/4] x86-64: Unify x86_*_percpu() functions Brian Gerst 0 siblings, 1 reply; 6+ messages in thread From: Brian Gerst @ 2009-01-03 4:23 UTC (permalink / raw) To: H. Peter Anvin Cc: Ingo Molnar, linux-kernel, the arch/x86 maintainers, Brian Gerst This patch makes the PDA a normal per-cpu variable, allowing the removal of the special allocator code. %gs still points to the base of the PDA. Tested on a dual-core AMD64 system. Signed-off-by: Brian Gerst <brgerst@gmail.com> --- arch/x86/include/asm/pda.h | 4 -- arch/x86/include/asm/percpu.h | 3 -- arch/x86/include/asm/setup.h | 1 - arch/x86/kernel/cpu/common.c | 6 ++-- arch/x86/kernel/dumpstack_64.c | 8 ++-- arch/x86/kernel/head64.c | 23 +------------ arch/x86/kernel/irq.c | 2 +- arch/x86/kernel/nmi.c | 2 +- arch/x86/kernel/setup_percpu.c | 70 ++++++++-------------------------------- arch/x86/kernel/smpboot.c | 58 +-------------------------------- arch/x86/xen/enlighten.c | 2 +- arch/x86/xen/smp.c | 12 +------ 12 files changed, 27 insertions(+), 164 deletions(-) diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 2fbfff8..60e8d91 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -23,7 +23,6 @@ struct x8664_pda { #endif char *irqstackptr; short nodenumber; /* number of current node (32k max) */ - short in_bootmem; /* pda lives in bootmem */ unsigned int __softirq_pending; unsigned int __nmi_count; /* number of NMI on this CPUs */ short mmu_state; @@ -39,11 +38,8 @@ struct x8664_pda { unsigned irq_spurious_count; } ____cacheline_aligned_in_smp; -extern struct x8664_pda **_cpu_pda; extern void pda_init(int); -#define cpu_pda(i) (_cpu_pda[i]) - /* * There is no fast way to get the base address of the PDA, all the accesses * have to mention %fs/%gs. So it needs to be done this Torvaldian way. diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index ece7205..6f866fd 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -12,11 +12,8 @@ #ifdef CONFIG_SMP #include <asm/pda.h> -#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset) #define __my_cpu_offset read_pda(data_offset) -#define per_cpu_offset(x) (__per_cpu_offset(x)) - #endif #include <asm-generic/percpu.h> diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 4fcd53f..2f3e50e 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -100,7 +100,6 @@ extern unsigned long init_pg_tables_start; extern unsigned long init_pg_tables_end; #else -void __init x86_64_init_pda(void); void __init x86_64_start_kernel(char *real_mode); void __init x86_64_start_reservations(char *real_mode_data); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 42e0853..d039178 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -859,8 +859,8 @@ __setup("clearcpuid=", setup_disablecpuid); cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; #ifdef CONFIG_X86_64 -struct x8664_pda **_cpu_pda __read_mostly; -EXPORT_SYMBOL(_cpu_pda); +DEFINE_PER_CPU_SHARED_ALIGNED(struct x8664_pda, pda); +EXPORT_PER_CPU_SYMBOL(pda); struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; @@ -868,7 +868,7 @@ static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; void __cpuinit pda_init(int cpu) { - struct x8664_pda *pda = cpu_pda(cpu); + struct x8664_pda *pda = &per_cpu(pda, cpu); /* Setup up data that may be needed in __get_free_pages early */ loadsegment(fs, 0); diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index c302d07..23e13e7 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -106,7 +106,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, const struct stacktrace_ops *ops, void *data) { const unsigned cpu = get_cpu(); - unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; + unsigned long *irqstack_end = (unsigned long *)per_cpu(pda, cpu).irqstackptr; unsigned used = 0; struct thread_info *tinfo; int graph = 0; @@ -200,9 +200,9 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, int i; const int cpu = smp_processor_id(); unsigned long *irqstack_end = - (unsigned long *) (cpu_pda(cpu)->irqstackptr); + (unsigned long *) (per_cpu(pda, cpu).irqstackptr); unsigned long *irqstack = - (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); + (unsigned long *) (per_cpu(pda, cpu).irqstackptr - IRQSTACKSIZE); /* * debugging aid: "show_stack(NULL, NULL);" prints the @@ -241,7 +241,7 @@ void show_registers(struct pt_regs *regs) int i; unsigned long sp; const int cpu = smp_processor_id(); - struct task_struct *cur = cpu_pda(cpu)->pcurrent; + struct task_struct *cur = per_cpu(pda, cpu).pcurrent; sp = regs->sp; printk("CPU %d ", cpu); diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 388e05a..af67d32 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -26,27 +26,6 @@ #include <asm/bios_ebda.h> #include <asm/trampoline.h> -/* boot cpu pda */ -static struct x8664_pda _boot_cpu_pda __read_mostly; - -#ifdef CONFIG_SMP -/* - * We install an empty cpu_pda pointer table to indicate to early users - * (numa_set_node) that the cpu_pda pointer table for cpus other than - * the boot cpu is not yet setup. - */ -static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata; -#else -static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly; -#endif - -void __init x86_64_init_pda(void) -{ - _cpu_pda = __cpu_pda; - cpu_pda(0) = &_boot_cpu_pda; - pda_init(0); -} - static void __init zap_identity_mappings(void) { pgd_t *pgd = pgd_offset_k(0UL); @@ -112,7 +91,7 @@ void __init x86_64_start_kernel(char * real_mode_data) if (console_loglevel == 10) early_printk("Kernel alive\n"); - x86_64_init_pda(); + pda_init(0); x86_64_start_reservations(real_mode_data); } diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index bce53e1..90f87fd 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -39,7 +39,7 @@ void ack_bad_irq(unsigned int irq) #ifdef CONFIG_X86_32 # define irq_stats(x) (&per_cpu(irq_stat, x)) #else -# define irq_stats(x) cpu_pda(x) +# define irq_stats(x) (&per_cpu(pda, x)) #endif /* * /proc/interrupts printing: diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 8bd1bf9..235672f 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -64,7 +64,7 @@ static int endflag __initdata; static inline unsigned int get_nmi_count(int cpu) { #ifdef CONFIG_X86_64 - return cpu_pda(cpu)->__nmi_count; + return per_cpu(pda, cpu).__nmi_count; #else return nmi_count(cpu); #endif diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 0b63b08..f27e7e7 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -80,58 +80,8 @@ static void __init setup_per_cpu_maps(void) #endif } -#ifdef CONFIG_X86_32 -/* - * Great future not-so-futuristic plan: make i386 and x86_64 do it - * the same way - */ unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; EXPORT_SYMBOL(__per_cpu_offset); -static inline void setup_cpu_pda_map(void) { } - -#elif !defined(CONFIG_SMP) -static inline void setup_cpu_pda_map(void) { } - -#else /* CONFIG_SMP && CONFIG_X86_64 */ - -/* - * Allocate cpu_pda pointer table and array via alloc_bootmem. - */ -static void __init setup_cpu_pda_map(void) -{ - char *pda; - struct x8664_pda **new_cpu_pda; - unsigned long size; - int cpu; - - size = roundup(sizeof(struct x8664_pda), cache_line_size()); - - /* allocate cpu_pda array and pointer table */ - { - unsigned long tsize = nr_cpu_ids * sizeof(void *); - unsigned long asize = size * (nr_cpu_ids - 1); - - tsize = roundup(tsize, cache_line_size()); - new_cpu_pda = alloc_bootmem(tsize + asize); - pda = (char *)new_cpu_pda + tsize; - } - - /* initialize pointer table to static pda's */ - for_each_possible_cpu(cpu) { - if (cpu == 0) { - /* leave boot cpu pda in place */ - new_cpu_pda[0] = cpu_pda(0); - continue; - } - new_cpu_pda[cpu] = (struct x8664_pda *)pda; - new_cpu_pda[cpu]->in_bootmem = 1; - pda += size; - } - - /* point to new pointer table */ - _cpu_pda = new_cpu_pda; -} -#endif /* * Great future plan: @@ -145,9 +95,6 @@ void __init setup_per_cpu_areas(void) int cpu; unsigned long align = 1; - /* Setup cpu_pda map */ - setup_cpu_pda_map(); - /* Copy section for each CPU (we discard the original) */ old_size = PERCPU_ENOUGH_ROOM; align = max_t(unsigned long, PAGE_SIZE, align); @@ -187,10 +134,21 @@ void __init setup_per_cpu_areas(void) cpu, node, __pa(ptr)); } #endif - per_cpu_offset(cpu) = ptr - __per_cpu_start; + __per_cpu_offset[cpu] = ptr - __per_cpu_start; memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); +#ifdef CONFIG_X86_64 + if (cpu) + memset(&per_cpu(pda, cpu), 0, sizeof(struct x8664_pda)); + per_cpu(pda, cpu).data_offset = __per_cpu_offset[cpu]; +#endif } +#ifdef CONFIG_X86_64 + mb(); + wrmsrl(MSR_GS_BASE, &per_cpu(pda, 0)); + mb(); +#endif + /* Setup percpu data maps */ setup_per_cpu_maps(); @@ -234,8 +192,8 @@ void __cpuinit numa_set_node(int cpu, int node) { int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); - if (cpu_pda(cpu) && node != NUMA_NO_NODE) - cpu_pda(cpu)->nodenumber = node; + if (node != NUMA_NO_NODE) + per_cpu(pda, cpu).nodenumber = node; if (cpu_to_node_map) cpu_to_node_map[cpu] = node; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 31869bf..e50fea9 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -744,52 +744,6 @@ static void __cpuinit do_fork_idle(struct work_struct *work) complete(&c_idle->done); } -#ifdef CONFIG_X86_64 - -/* __ref because it's safe to call free_bootmem when after_bootmem == 0. */ -static void __ref free_bootmem_pda(struct x8664_pda *oldpda) -{ - if (!after_bootmem) - free_bootmem((unsigned long)oldpda, sizeof(*oldpda)); -} - -/* - * Allocate node local memory for the AP pda. - * - * Must be called after the _cpu_pda pointer table is initialized. - */ -int __cpuinit get_local_pda(int cpu) -{ - struct x8664_pda *oldpda, *newpda; - unsigned long size = sizeof(struct x8664_pda); - int node = cpu_to_node(cpu); - - if (cpu_pda(cpu) && !cpu_pda(cpu)->in_bootmem) - return 0; - - oldpda = cpu_pda(cpu); - newpda = kmalloc_node(size, GFP_ATOMIC, node); - if (!newpda) { - printk(KERN_ERR "Could not allocate node local PDA " - "for CPU %d on node %d\n", cpu, node); - - if (oldpda) - return 0; /* have a usable pda */ - else - return -1; - } - - if (oldpda) { - memcpy(newpda, oldpda, size); - free_bootmem_pda(oldpda); - } - - newpda->in_bootmem = 0; - cpu_pda(cpu) = newpda; - return 0; -} -#endif /* CONFIG_X86_64 */ - static int __cpuinit do_boot_cpu(int apicid, int cpu) /* * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad @@ -807,16 +761,6 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) }; INIT_WORK(&c_idle.work, do_fork_idle); -#ifdef CONFIG_X86_64 - /* Allocate node local memory for AP pdas */ - if (cpu > 0) { - boot_error = get_local_pda(cpu); - if (boot_error) - goto restore_state; - /* if can't get pda memory, can't start cpu */ - } -#endif - alternatives_smp_switch(1); c_idle.idle = get_idle_for_cpu(cpu); @@ -852,7 +796,7 @@ do_rest: /* Stack for startup_32 can be just as for start_secondary onwards */ irq_ctx_init(cpu); #else - cpu_pda(cpu)->pcurrent = c_idle.idle; + per_cpu(pda, cpu).pcurrent = c_idle.idle; clear_tsk_thread_flag(c_idle.idle, TIF_FORK); #endif early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index bea2152..76e092d 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1652,7 +1652,7 @@ asmlinkage void __init xen_start_kernel(void) #ifdef CONFIG_X86_64 /* Disable until direct per-cpu data access. */ have_vcpu_info_placement = 0; - x86_64_init_pda(); + pda_init(0); #endif xen_smp_init(); diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index c44e206..0d8d19e 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -283,22 +283,12 @@ static int __cpuinit xen_cpu_up(unsigned int cpu) struct task_struct *idle = idle_task(cpu); int rc; -#ifdef CONFIG_X86_64 - /* Allocate node local memory for AP pdas */ - WARN_ON(cpu == 0); - if (cpu > 0) { - rc = get_local_pda(cpu); - if (rc) - return rc; - } -#endif - #ifdef CONFIG_X86_32 init_gdt(cpu); per_cpu(current_task, cpu) = idle; irq_ctx_init(cpu); #else - cpu_pda(cpu)->pcurrent = idle; + per_cpu(pda, cpu).pcurrent = idle; clear_tsk_thread_flag(idle, TIF_FORK); #endif xen_setup_timer(cpu); -- 1.6.1.rc1 ^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 2/4] x86-64: Unify x86_*_percpu() functions. 2009-01-03 4:23 ` [PATCH 1/4] x86-64: Convert the PDA to percpu Brian Gerst @ 2009-01-03 4:23 ` Brian Gerst 2009-01-03 4:23 ` [PATCH 3/4] x86-64: Move irq stats from PDA to per-cpu and consolidate with 32-bit Brian Gerst 0 siblings, 1 reply; 6+ messages in thread From: Brian Gerst @ 2009-01-03 4:23 UTC (permalink / raw) To: H. Peter Anvin Cc: Ingo Molnar, linux-kernel, the arch/x86 maintainers, Brian Gerst Merge the 32-bit and 64-bit versions of these functions. Unlike 32-bit, the segment base is the current cpu's PDA instead of the offset from the original per-cpu area. This is because GCC hardcodes the stackprotector canary at %gs:40. Since the assembler is incapable of relocating against multiple symbols, the code ends up looking like: movq $per_cpu__var, reg subq $per_cpu__pda, reg movq %gs:(reg), reg This is still atomic since the offset is a constant (just calculated at runtime) and not dependant on the cpu number. Signed-off-by: Brian Gerst <brgerst@gmail.com> --- arch/x86/include/asm/percpu.h | 92 +++++++++++++++++----------------------- 1 files changed, 39 insertions(+), 53 deletions(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 6f866fd..f704243 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -1,54 +1,9 @@ #ifndef _ASM_X86_PERCPU_H #define _ASM_X86_PERCPU_H -#ifdef CONFIG_X86_64 -#include <linux/compiler.h> - -/* Same as asm-generic/percpu.h, except that we store the per cpu offset - in the PDA. Longer term the PDA and every per cpu variable - should be just put into a single section and referenced directly - from %gs */ - -#ifdef CONFIG_SMP -#include <asm/pda.h> - -#define __my_cpu_offset read_pda(data_offset) - -#endif -#include <asm-generic/percpu.h> - -DECLARE_PER_CPU(struct x8664_pda, pda); - -/* - * These are supposed to be implemented as a single instruction which - * operates on the per-cpu data base segment. x86-64 doesn't have - * that yet, so this is a fairly inefficient workaround for the - * meantime. The single instruction is atomic with respect to - * preemption and interrupts, so we need to explicitly disable - * interrupts here to achieve the same effect. However, because it - * can be used from within interrupt-disable/enable, we can't actually - * disable interrupts; disabling preemption is enough. - */ -#define x86_read_percpu(var) \ - ({ \ - typeof(per_cpu_var(var)) __tmp; \ - preempt_disable(); \ - __tmp = __get_cpu_var(var); \ - preempt_enable(); \ - __tmp; \ - }) - -#define x86_write_percpu(var, val) \ - do { \ - preempt_disable(); \ - __get_cpu_var(var) = (val); \ - preempt_enable(); \ - } while(0) - -#else /* CONFIG_X86_64 */ - #ifdef __ASSEMBLY__ +#ifdef CONFIG_X86_32 /* * PER_CPU finds an address of a per-cpu variable. * @@ -72,6 +27,8 @@ DECLARE_PER_CPU(struct x8664_pda, pda); #define PER_CPU_VAR(var) per_cpu__##var #endif /* SMP */ +#endif /* X86_32 */ + #else /* ...!ASSEMBLY */ /* @@ -88,19 +45,37 @@ DECLARE_PER_CPU(struct x8664_pda, pda); */ #ifdef CONFIG_SMP +#ifdef CONFIG_X86_32 + #define __my_cpu_offset x86_read_percpu(this_cpu_off) /* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */ #define __percpu_seg "%%fs:" +#define __percpu_seg_off(x) (x) + +#else + +#define __my_cpu_offset read_pda(data_offset) + +#define __percpu_seg "%%gs:" +#define __percpu_seg_off(x) RELOC_HIDE((x), -(unsigned long)&per_cpu__pda) + +#endif #else /* !SMP */ #define __percpu_seg "" +#define __percpu_seg_off(x) (x) #endif /* SMP */ #include <asm-generic/percpu.h> +#ifdef CONFIG_X86_64 +#include <asm/pda.h> +DECLARE_PER_CPU(struct x8664_pda, pda); +#endif + /* We can use this directly for local CPU (faster). */ DECLARE_PER_CPU(unsigned long, this_cpu_off); @@ -111,6 +86,7 @@ extern void __bad_percpu_size(void); #define percpu_to_op(op, var, val) \ do { \ typedef typeof(var) T__; \ + typeof(var) *var__ = __percpu_seg_off(&var); \ if (0) { \ T__ tmp__; \ tmp__ = (val); \ @@ -118,17 +94,22 @@ do { \ switch (sizeof(var)) { \ case 1: \ asm(op "b %1,"__percpu_seg"%0" \ - : "+m" (var) \ + : "+m" (*var__) \ : "ri" ((T__)val)); \ break; \ case 2: \ asm(op "w %1,"__percpu_seg"%0" \ - : "+m" (var) \ + : "+m" (*var__) \ : "ri" ((T__)val)); \ break; \ case 4: \ asm(op "l %1,"__percpu_seg"%0" \ - : "+m" (var) \ + : "+m" (*var__) \ + : "ri" ((T__)val)); \ + break; \ + case 8: \ + asm(op "q %1,"__percpu_seg"%0" \ + : "+m" (*var__) \ : "ri" ((T__)val)); \ break; \ default: __bad_percpu_size(); \ @@ -138,21 +119,27 @@ do { \ #define percpu_from_op(op, var) \ ({ \ typeof(var) ret__; \ + typeof(var) *var__ = __percpu_seg_off(&var); \ switch (sizeof(var)) { \ case 1: \ asm(op "b "__percpu_seg"%1,%0" \ : "=r" (ret__) \ - : "m" (var)); \ + : "m" (*var__)); \ break; \ case 2: \ asm(op "w "__percpu_seg"%1,%0" \ : "=r" (ret__) \ - : "m" (var)); \ + : "m" (*var__)); \ break; \ case 4: \ asm(op "l "__percpu_seg"%1,%0" \ : "=r" (ret__) \ - : "m" (var)); \ + : "m" (*var__)); \ + break; \ + case 8: \ + asm(op "q "__percpu_seg"%1,%0" \ + : "=r" (ret__) \ + : "m" (*var__)); \ break; \ default: __bad_percpu_size(); \ } \ @@ -165,7 +152,6 @@ do { \ #define x86_sub_percpu(var, val) percpu_to_op("sub", per_cpu__##var, val) #define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val) #endif /* !__ASSEMBLY__ */ -#endif /* !CONFIG_X86_64 */ #ifdef CONFIG_SMP -- 1.6.1.rc1 ^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 3/4] x86-64: Move irq stats from PDA to per-cpu and consolidate with 32-bit. 2009-01-03 4:23 ` [PATCH 2/4] x86-64: Unify x86_*_percpu() functions Brian Gerst @ 2009-01-03 4:23 ` Brian Gerst 2009-01-03 4:23 ` [PATCH 4/4] x86-64: Move TLB state " Brian Gerst 0 siblings, 1 reply; 6+ messages in thread From: Brian Gerst @ 2009-01-03 4:23 UTC (permalink / raw) To: H. Peter Anvin Cc: Ingo Molnar, linux-kernel, the arch/x86 maintainers, Brian Gerst Signed-off-by: Brian Gerst <brgerst@gmail.com> --- arch/x86/include/asm/hardirq_64.h | 24 +++++++++++++++++++----- arch/x86/include/asm/pda.h | 10 ---------- arch/x86/kernel/irq.c | 6 +----- arch/x86/kernel/irq_64.c | 3 +++ arch/x86/kernel/nmi.c | 10 +--------- arch/x86/xen/smp.c | 18 +++--------------- 6 files changed, 27 insertions(+), 44 deletions(-) diff --git a/arch/x86/include/asm/hardirq_64.h b/arch/x86/include/asm/hardirq_64.h index b5a6b5d..213df9a 100644 --- a/arch/x86/include/asm/hardirq_64.h +++ b/arch/x86/include/asm/hardirq_64.h @@ -3,22 +3,36 @@ #include <linux/threads.h> #include <linux/irq.h> -#include <asm/pda.h> #include <asm/apic.h> +typedef struct { + unsigned int __softirq_pending; + unsigned int __nmi_count; /* arch dependent */ + unsigned int apic_timer_irqs; /* arch dependent */ + unsigned int irq0_irqs; + unsigned int irq_resched_count; + unsigned int irq_call_count; + unsigned int irq_tlb_count; + unsigned int irq_thermal_count; + unsigned int irq_spurious_count; + unsigned int irq_threshold_count; +} ____cacheline_aligned irq_cpustat_t; + +DECLARE_PER_CPU(irq_cpustat_t, irq_stat); + /* We can have at most NR_VECTORS irqs routed to a cpu at a time */ #define MAX_HARDIRQS_PER_CPU NR_VECTORS #define __ARCH_IRQ_STAT 1 -#define inc_irq_stat(member) add_pda(member, 1) +#define inc_irq_stat(member) x86_add_percpu(irq_stat.member, 1) -#define local_softirq_pending() read_pda(__softirq_pending) +#define local_softirq_pending() x86_read_percpu(irq_stat.__softirq_pending) #define __ARCH_SET_SOFTIRQ_PENDING 1 -#define set_softirq_pending(x) write_pda(__softirq_pending, (x)) -#define or_softirq_pending(x) or_pda(__softirq_pending, (x)) +#define set_softirq_pending(x) x86_write_percpu(irq_stat.__softirq_pending, (x)) +#define or_softirq_pending(x) x86_or_percpu(irq_stat.__softirq_pending, (x)) extern void ack_bad_irq(unsigned int irq); diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 60e8d91..97a95fa 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -23,19 +23,9 @@ struct x8664_pda { #endif char *irqstackptr; short nodenumber; /* number of current node (32k max) */ - unsigned int __softirq_pending; - unsigned int __nmi_count; /* number of NMI on this CPUs */ short mmu_state; short isidle; struct mm_struct *active_mm; - unsigned apic_timer_irqs; - unsigned irq0_irqs; - unsigned irq_resched_count; - unsigned irq_call_count; - unsigned irq_tlb_count; - unsigned irq_thermal_count; - unsigned irq_threshold_count; - unsigned irq_spurious_count; } ____cacheline_aligned_in_smp; extern void pda_init(int); diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 90f87fd..4be7ebf 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -36,11 +36,7 @@ void ack_bad_irq(unsigned int irq) #endif } -#ifdef CONFIG_X86_32 -# define irq_stats(x) (&per_cpu(irq_stat, x)) -#else -# define irq_stats(x) (&per_cpu(pda, x)) -#endif +#define irq_stats(x) (&per_cpu(irq_stat, x)) /* * /proc/interrupts printing: */ diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 6383d50..b98fd64 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -19,6 +19,9 @@ #include <asm/idle.h> #include <asm/smp.h> +DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); +EXPORT_PER_CPU_SYMBOL(irq_stat); + /* * Probabilistic stack overflow check: * diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 235672f..1872967 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -63,11 +63,7 @@ static int endflag __initdata; static inline unsigned int get_nmi_count(int cpu) { -#ifdef CONFIG_X86_64 - return per_cpu(pda, cpu).__nmi_count; -#else - return nmi_count(cpu); -#endif + return per_cpu(irq_stat, cpu).__nmi_count; } static inline int mce_in_progress(void) @@ -84,12 +80,8 @@ static inline int mce_in_progress(void) */ static inline unsigned int get_timer_irqs(int cpu) { -#ifdef CONFIG_X86_64 - return read_pda(apic_timer_irqs) + read_pda(irq0_irqs); -#else return per_cpu(irq_stat, cpu).apic_timer_irqs + per_cpu(irq_stat, cpu).irq0_irqs; -#endif } #ifdef CONFIG_SMP diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 0d8d19e..9d3865d 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -50,11 +50,7 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id); */ static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) { -#ifdef CONFIG_X86_32 - __get_cpu_var(irq_stat).irq_resched_count++; -#else - add_pda(irq_resched_count, 1); -#endif + inc_irq_stat(irq_resched_count); return IRQ_HANDLED; } @@ -435,11 +431,7 @@ static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) { irq_enter(); generic_smp_call_function_interrupt(); -#ifdef CONFIG_X86_32 - __get_cpu_var(irq_stat).irq_call_count++; -#else - add_pda(irq_call_count, 1); -#endif + inc_irq_stat(irq_call_count); irq_exit(); return IRQ_HANDLED; @@ -449,11 +441,7 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id) { irq_enter(); generic_smp_call_function_single_interrupt(); -#ifdef CONFIG_X86_32 - __get_cpu_var(irq_stat).irq_call_count++; -#else - add_pda(irq_call_count, 1); -#endif + inc_irq_stat(irq_call_count); irq_exit(); return IRQ_HANDLED; -- 1.6.1.rc1 ^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 4/4] x86-64: Move TLB state from PDA to per-cpu and consolidate with 32-bit. 2009-01-03 4:23 ` [PATCH 3/4] x86-64: Move irq stats from PDA to per-cpu and consolidate with 32-bit Brian Gerst @ 2009-01-03 4:23 ` Brian Gerst 0 siblings, 0 replies; 6+ messages in thread From: Brian Gerst @ 2009-01-03 4:23 UTC (permalink / raw) To: H. Peter Anvin Cc: Ingo Molnar, linux-kernel, the arch/x86 maintainers, Brian Gerst Signed-off-by: Brian Gerst <brgerst@gmail.com> --- arch/x86/include/asm/mmu_context_64.h | 14 +++++++------- arch/x86/include/asm/pda.h | 2 -- arch/x86/include/asm/tlbflush.h | 7 ++----- arch/x86/kernel/cpu/common.c | 2 -- arch/x86/kernel/tlb_32.c | 12 ++---------- arch/x86/kernel/tlb_64.c | 13 ++++++++----- arch/x86/xen/mmu.c | 6 +----- 7 files changed, 20 insertions(+), 36 deletions(-) diff --git a/arch/x86/include/asm/mmu_context_64.h b/arch/x86/include/asm/mmu_context_64.h index 677d36e..8fb6060 100644 --- a/arch/x86/include/asm/mmu_context_64.h +++ b/arch/x86/include/asm/mmu_context_64.h @@ -6,8 +6,8 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { #ifdef CONFIG_SMP - if (read_pda(mmu_state) == TLBSTATE_OK) - write_pda(mmu_state, TLBSTATE_LAZY); + if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) + x86_write_percpu(cpu_tlbstate.state, TLBSTATE_LAZY); #endif } @@ -19,8 +19,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, /* stop flush ipis for the previous mm */ cpu_clear(cpu, prev->cpu_vm_mask); #ifdef CONFIG_SMP - write_pda(mmu_state, TLBSTATE_OK); - write_pda(active_mm, next); + x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK); + x86_write_percpu(cpu_tlbstate.active_mm, next); #endif cpu_set(cpu, next->cpu_vm_mask); load_cr3(next->pgd); @@ -30,9 +30,9 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, } #ifdef CONFIG_SMP else { - write_pda(mmu_state, TLBSTATE_OK); - if (read_pda(active_mm) != next) - BUG(); + x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK); + BUG_ON(x86_read_percpu(cpu_tlbstate.active_mm) != next); + if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { /* We were in lazy tlb mode and leave_mm disabled * tlb flush IPI delivery. We must reload CR3 diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 97a95fa..bc3b719 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -23,9 +23,7 @@ struct x8664_pda { #endif char *irqstackptr; short nodenumber; /* number of current node (32k max) */ - short mmu_state; short isidle; - struct mm_struct *active_mm; } ____cacheline_aligned_in_smp; extern void pda_init(int); diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 0e7bbb5..b344098 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -148,20 +148,17 @@ void native_flush_tlb_others(const cpumask_t *cpumask, struct mm_struct *mm, #define TLBSTATE_OK 1 #define TLBSTATE_LAZY 2 -#ifdef CONFIG_X86_32 struct tlb_state { struct mm_struct *active_mm; int state; - char __cacheline_padding[L1_CACHE_BYTES-8]; }; DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate); -void reset_lazy_tlbstate(void); -#else static inline void reset_lazy_tlbstate(void) { + x86_write_percpu(cpu_tlbstate.state, 0); + x86_write_percpu(cpu_tlbstate.active_mm, &init_mm); } -#endif #endif /* SMP */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index d039178..2a696d1 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -882,8 +882,6 @@ void __cpuinit pda_init(int cpu) pda->irqcount = -1; pda->kernelstack = (unsigned long)stack_thread_info() - PDA_STACKOFFSET + THREAD_SIZE; - pda->active_mm = &init_mm; - pda->mmu_state = 0; if (cpu == 0) { /* others are initialized in smpboot.c */ diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c index ce50546..15833ae 100644 --- a/arch/x86/kernel/tlb_32.c +++ b/arch/x86/kernel/tlb_32.c @@ -4,8 +4,8 @@ #include <asm/tlbflush.h> -DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) - ____cacheline_aligned = { &init_mm, 0, }; +DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) + = { &init_mm, 0, }; /* must come after the send_IPI functions above for inlining */ #include <mach_ipi.h> @@ -246,11 +246,3 @@ void flush_tlb_all(void) on_each_cpu(do_flush_tlb_all, NULL, 1); } -void reset_lazy_tlbstate(void) -{ - int cpu = raw_smp_processor_id(); - - per_cpu(cpu_tlbstate, cpu).state = 0; - per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; -} - diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index f8be6f1..3bcb78d 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -18,6 +18,9 @@ #include <asm/uv/uv_hub.h> #include <asm/uv/uv_bau.h> +DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) + = { &init_mm, 0, }; + #include <mach_ipi.h> /* * Smarter SMP flushing macros. @@ -62,9 +65,9 @@ static DEFINE_PER_CPU(union smp_flush_state, flush_state); */ void leave_mm(int cpu) { - if (read_pda(mmu_state) == TLBSTATE_OK) + if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) BUG(); - cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask); + cpu_clear(cpu, x86_read_percpu(cpu_tlbstate.active_mm)->cpu_vm_mask); load_cr3(swapper_pg_dir); } EXPORT_SYMBOL_GPL(leave_mm); @@ -142,8 +145,8 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) * BUG(); */ - if (f->flush_mm == read_pda(active_mm)) { - if (read_pda(mmu_state) == TLBSTATE_OK) { + if (f->flush_mm == x86_read_percpu(cpu_tlbstate.active_mm)) { + if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) { if (f->flush_va == TLB_FLUSH_ALL) local_flush_tlb(); else @@ -274,7 +277,7 @@ static void do_flush_tlb_all(void *info) unsigned long cpu = smp_processor_id(); __flush_tlb_all(); - if (read_pda(mmu_state) == TLBSTATE_LAZY) + if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_LAZY) leave_mm(cpu); } diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 503c240..0d9ed77 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1063,11 +1063,7 @@ static void drop_other_mm_ref(void *info) struct mm_struct *mm = info; struct mm_struct *active_mm; -#ifdef CONFIG_X86_64 - active_mm = read_pda(active_mm); -#else - active_mm = __get_cpu_var(cpu_tlbstate).active_mm; -#endif + active_mm = x86_read_percpu(cpu_tlbstate.active_mm); if (active_mm == mm) leave_mm(smp_processor_id()); -- 1.6.1.rc1 ^ permalink raw reply related [flat|nested] 6+ messages in thread
* PDA changes (take 2, resend) @ 2009-01-01 0:13 Brian Gerst 2009-01-01 0:13 ` [PATCH 2/4] x86-64: Unify x86_*_percpu() functions Brian Gerst 0 siblings, 1 reply; 6+ messages in thread From: Brian Gerst @ 2009-01-01 0:13 UTC (permalink / raw) To: Ingo Molnar; +Cc: linux-kernel Here are the PDA patches again. I changed the first patch so that the PDA is cacheline aligned. I dropped the cpunumber patch for now, and added conversions of the TLB state and IRQ stats to match the 32-bit code. Stats for defconfig: text data bss dec hex filename 7033649 1754476 758508 9546633 91ab89 vmlinux.before 7029643 1754716 758508 9542867 919cd3 vmlinux.after Patches are against 2.6.28. ^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH 2/4] x86-64: Unify x86_*_percpu() functions. 2009-01-01 0:13 PDA changes (take 2, resend) Brian Gerst @ 2009-01-01 0:13 ` Brian Gerst 0 siblings, 0 replies; 6+ messages in thread From: Brian Gerst @ 2009-01-01 0:13 UTC (permalink / raw) To: Ingo Molnar; +Cc: linux-kernel, Brian Gerst Merge the 32-bit and 64-bit versions of these functions. Unlike 32-bit, the segment base is the current cpu's PDA instead of the offset from the original per-cpu area. This is because GCC hardcodes the stackprotector canary at %gs:40. Since the assembler is incapable of relocating against multiple symbols, the code ends up looking like: movq $per_cpu__var, reg subq $per_cpu__pda, reg movq %gs:(reg), reg This is still atomic since the offset is a constant (just calculated at runtime) and not dependant on the cpu number. Signed-off-by: Brian Gerst <brgerst@gmail.com> --- arch/x86/include/asm/percpu.h | 92 +++++++++++++++++----------------------- 1 files changed, 39 insertions(+), 53 deletions(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 6f866fd..f704243 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -1,54 +1,9 @@ #ifndef _ASM_X86_PERCPU_H #define _ASM_X86_PERCPU_H -#ifdef CONFIG_X86_64 -#include <linux/compiler.h> - -/* Same as asm-generic/percpu.h, except that we store the per cpu offset - in the PDA. Longer term the PDA and every per cpu variable - should be just put into a single section and referenced directly - from %gs */ - -#ifdef CONFIG_SMP -#include <asm/pda.h> - -#define __my_cpu_offset read_pda(data_offset) - -#endif -#include <asm-generic/percpu.h> - -DECLARE_PER_CPU(struct x8664_pda, pda); - -/* - * These are supposed to be implemented as a single instruction which - * operates on the per-cpu data base segment. x86-64 doesn't have - * that yet, so this is a fairly inefficient workaround for the - * meantime. The single instruction is atomic with respect to - * preemption and interrupts, so we need to explicitly disable - * interrupts here to achieve the same effect. However, because it - * can be used from within interrupt-disable/enable, we can't actually - * disable interrupts; disabling preemption is enough. - */ -#define x86_read_percpu(var) \ - ({ \ - typeof(per_cpu_var(var)) __tmp; \ - preempt_disable(); \ - __tmp = __get_cpu_var(var); \ - preempt_enable(); \ - __tmp; \ - }) - -#define x86_write_percpu(var, val) \ - do { \ - preempt_disable(); \ - __get_cpu_var(var) = (val); \ - preempt_enable(); \ - } while(0) - -#else /* CONFIG_X86_64 */ - #ifdef __ASSEMBLY__ +#ifdef CONFIG_X86_32 /* * PER_CPU finds an address of a per-cpu variable. * @@ -72,6 +27,8 @@ DECLARE_PER_CPU(struct x8664_pda, pda); #define PER_CPU_VAR(var) per_cpu__##var #endif /* SMP */ +#endif /* X86_32 */ + #else /* ...!ASSEMBLY */ /* @@ -88,19 +45,37 @@ DECLARE_PER_CPU(struct x8664_pda, pda); */ #ifdef CONFIG_SMP +#ifdef CONFIG_X86_32 + #define __my_cpu_offset x86_read_percpu(this_cpu_off) /* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */ #define __percpu_seg "%%fs:" +#define __percpu_seg_off(x) (x) + +#else + +#define __my_cpu_offset read_pda(data_offset) + +#define __percpu_seg "%%gs:" +#define __percpu_seg_off(x) RELOC_HIDE((x), -(unsigned long)&per_cpu__pda) + +#endif #else /* !SMP */ #define __percpu_seg "" +#define __percpu_seg_off(x) (x) #endif /* SMP */ #include <asm-generic/percpu.h> +#ifdef CONFIG_X86_64 +#include <asm/pda.h> +DECLARE_PER_CPU(struct x8664_pda, pda); +#endif + /* We can use this directly for local CPU (faster). */ DECLARE_PER_CPU(unsigned long, this_cpu_off); @@ -111,6 +86,7 @@ extern void __bad_percpu_size(void); #define percpu_to_op(op, var, val) \ do { \ typedef typeof(var) T__; \ + typeof(var) *var__ = __percpu_seg_off(&var); \ if (0) { \ T__ tmp__; \ tmp__ = (val); \ @@ -118,17 +94,22 @@ do { \ switch (sizeof(var)) { \ case 1: \ asm(op "b %1,"__percpu_seg"%0" \ - : "+m" (var) \ + : "+m" (*var__) \ : "ri" ((T__)val)); \ break; \ case 2: \ asm(op "w %1,"__percpu_seg"%0" \ - : "+m" (var) \ + : "+m" (*var__) \ : "ri" ((T__)val)); \ break; \ case 4: \ asm(op "l %1,"__percpu_seg"%0" \ - : "+m" (var) \ + : "+m" (*var__) \ + : "ri" ((T__)val)); \ + break; \ + case 8: \ + asm(op "q %1,"__percpu_seg"%0" \ + : "+m" (*var__) \ : "ri" ((T__)val)); \ break; \ default: __bad_percpu_size(); \ @@ -138,21 +119,27 @@ do { \ #define percpu_from_op(op, var) \ ({ \ typeof(var) ret__; \ + typeof(var) *var__ = __percpu_seg_off(&var); \ switch (sizeof(var)) { \ case 1: \ asm(op "b "__percpu_seg"%1,%0" \ : "=r" (ret__) \ - : "m" (var)); \ + : "m" (*var__)); \ break; \ case 2: \ asm(op "w "__percpu_seg"%1,%0" \ : "=r" (ret__) \ - : "m" (var)); \ + : "m" (*var__)); \ break; \ case 4: \ asm(op "l "__percpu_seg"%1,%0" \ : "=r" (ret__) \ - : "m" (var)); \ + : "m" (*var__)); \ + break; \ + case 8: \ + asm(op "q "__percpu_seg"%1,%0" \ + : "=r" (ret__) \ + : "m" (*var__)); \ break; \ default: __bad_percpu_size(); \ } \ @@ -165,7 +152,6 @@ do { \ #define x86_sub_percpu(var, val) percpu_to_op("sub", per_cpu__##var, val) #define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val) #endif /* !__ASSEMBLY__ */ -#endif /* !CONFIG_X86_64 */ #ifdef CONFIG_SMP -- 1.6.1.rc1 ^ permalink raw reply related [flat|nested] 6+ messages in thread
end of thread, other threads:[~2009-01-03 4:24 UTC | newest] Thread overview: 6+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2009-01-03 4:22 PDA changes (take 3) Brian Gerst 2009-01-03 4:23 ` [PATCH 1/4] x86-64: Convert the PDA to percpu Brian Gerst 2009-01-03 4:23 ` [PATCH 2/4] x86-64: Unify x86_*_percpu() functions Brian Gerst 2009-01-03 4:23 ` [PATCH 3/4] x86-64: Move irq stats from PDA to per-cpu and consolidate with 32-bit Brian Gerst 2009-01-03 4:23 ` [PATCH 4/4] x86-64: Move TLB state " Brian Gerst -- strict thread matches above, loose matches on Subject: below -- 2009-01-01 0:13 PDA changes (take 2, resend) Brian Gerst 2009-01-01 0:13 ` [PATCH 2/4] x86-64: Unify x86_*_percpu() functions Brian Gerst
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox