From: travis@sgi.com
To: Andrew Morton <akpm@linux-foundation.org>,
Andi Kleen <ak@suse.de>,
mingo@elte.hu
Cc: Christoph Lameter <clameter@sgi.com>,
jeremy@goop.org, linux-mm@kvack.org,
linux-kernel@vger.kernel.org
Subject: [PATCH 2/3] x86_64: Fold pda into per cpu area
Date: Tue, 22 Jan 2008 20:49:26 -0800 [thread overview]
Message-ID: <20080123044924.926408000@sgi.com> (raw)
In-Reply-To: 20080123044924.508382000@sgi.com
[-- Attachment #1: x86_64_fold_pda --]
[-- Type: text/plain, Size: 6984 bytes --]
* Declare the pda as a per cpu variable. This will move the pda area
to an address accessible by the x86_64 per cpu macros. Subtraction
of __per_cpu_start will make the offset based from the beginning
of the per cpu area. Since %gs is pointing to the pda, it will
then also point to the per cpu variables and can be accessed thusly:
%gs:[&per_cpu_xxxx - __per_cpu_start]
* The boot_pdas are only needed in head64.c so move the declaration
over there and make it static.
* Remove the code that allocates special pda data structures.
Based on 2.6.24-rc8-mm1
Signed-off-by: Mike Travis <travis@sgi.com>
Reviewed-by: Christoph Lameter <clameter@sgi.com>
---
arch/x86/kernel/head64.c | 6 ++++++
arch/x86/kernel/setup64.c | 12 ++++++++++--
arch/x86/kernel/smpboot_64.c | 16 ----------------
include/asm-generic/vmlinux.lds.h | 1 +
include/asm-x86/pda.h | 1 -
include/asm-x86/percpu.h | 30 +++++++++++++++++++-----------
include/linux/percpu.h | 13 ++++++++++++-
7 files changed, 48 insertions(+), 31 deletions(-)
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -22,6 +22,12 @@
#include <asm/sections.h>
#include <asm/kdebug.h>
+/*
+ * Only used before the per cpu areas are setup. The use for the non possible
+ * cpus continues after boot
+ */
+static struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned;
+
static void __init zap_identity_mappings(void)
{
pgd_t *pgd = pgd_offset_k(0UL);
--- a/arch/x86/kernel/setup64.c
+++ b/arch/x86/kernel/setup64.c
@@ -34,7 +34,9 @@ cpumask_t cpu_initialized __cpuinitdata
struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(_cpu_pda);
-struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned;
+
+DEFINE_PER_CPU_FIRST(struct x8664_pda, pda);
+EXPORT_PER_CPU_SYMBOL(pda);
struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
@@ -150,10 +152,16 @@ void __init setup_per_cpu_areas(void)
}
if (!ptr)
panic("Cannot allocate cpu data for CPU %d\n", i);
- cpu_pda(i)->data_offset = ptr - __per_cpu_start;
memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
+ /* Relocate the pda */
+ memcpy(ptr, cpu_pda(i), sizeof(struct x8664_pda));
+ cpu_pda(i) = (struct x8664_pda *)ptr;
+ cpu_pda(i)->data_offset = ptr - __per_cpu_start;
}
+ /* Fix up pda for this processor .... */
+ pda_init(0);
+
/* setup percpu data maps early */
setup_per_cpu_maps();
}
--- a/arch/x86/kernel/smpboot_64.c
+++ b/arch/x86/kernel/smpboot_64.c
@@ -566,22 +566,6 @@ static int __cpuinit do_boot_cpu(int cpu
return -1;
}
- /* Allocate node local memory for AP pdas */
- if (cpu_pda(cpu) == &boot_cpu_pda[cpu]) {
- struct x8664_pda *newpda, *pda;
- int node = cpu_to_node(cpu);
- pda = cpu_pda(cpu);
- newpda = kmalloc_node(sizeof (struct x8664_pda), GFP_ATOMIC,
- node);
- if (newpda) {
- memcpy(newpda, pda, sizeof (struct x8664_pda));
- cpu_pda(cpu) = newpda;
- } else
- printk(KERN_ERR
- "Could not allocate node local PDA for CPU %d on node %d\n",
- cpu, node);
- }
-
alternatives_smp_switch(1);
c_idle.idle = get_idle_for_cpu(cpu);
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -273,6 +273,7 @@
. = ALIGN(align); \
__per_cpu_start = .; \
.data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { \
+ *(.data.percpu.first) \
*(.data.percpu) \
*(.data.percpu.shared_aligned) \
} \
--- a/include/asm-x86/pda.h
+++ b/include/asm-x86/pda.h
@@ -39,7 +39,6 @@ struct x8664_pda {
} ____cacheline_aligned_in_smp;
extern struct x8664_pda *_cpu_pda[];
-extern struct x8664_pda boot_cpu_pda[];
extern void pda_init(int);
#define cpu_pda(i) (_cpu_pda[i])
--- a/include/asm-x86/percpu.h
+++ b/include/asm-x86/percpu.h
@@ -16,7 +16,14 @@
#define __my_cpu_offset read_pda(data_offset)
#define per_cpu_offset(x) (__per_cpu_offset(x))
+#define __percpu_seg "%%gs:"
+/* Calculate the offset to use with the segment register */
+#define seg_offset(name) (*SHIFT_PERCPU_PTR(&per_cpu_var(name), \
+ - (unsigned long)__per_cpu_start))
+#else
+#define __percpu_seg ""
+#define seg_offset(name) per_cpu_var(name)
#endif
#include <asm-generic/percpu.h>
@@ -64,16 +71,11 @@ DECLARE_PER_CPU(struct x8664_pda, pda);
* PER_CPU(cpu_gdt_descr, %ebx)
*/
#ifdef CONFIG_SMP
-
#define __my_cpu_offset x86_read_percpu(this_cpu_off)
-
/* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */
#define __percpu_seg "%%fs:"
-
#else /* !SMP */
-
#define __percpu_seg ""
-
#endif /* SMP */
#include <asm-generic/percpu.h>
@@ -81,6 +83,13 @@ DECLARE_PER_CPU(struct x8664_pda, pda);
/* We can use this directly for local CPU (faster). */
DECLARE_PER_CPU(unsigned long, this_cpu_off);
+#define seg_offset(name) per_cpu_var(name)
+
+#endif /* __ASSEMBLY__ */
+#endif /* !CONFIG_X86_64 */
+
+#ifndef __ASSEMBLY__
+
/* For arch-specific code, we can use direct single-insn ops (they
* don't give an lvalue though). */
extern void __bad_percpu_size(void);
@@ -132,11 +141,10 @@ extern void __bad_percpu_size(void);
} \
ret__; })
-#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var)
-#define x86_write_percpu(var,val) percpu_to_op("mov", per_cpu__##var, val)
-#define x86_add_percpu(var,val) percpu_to_op("add", per_cpu__##var, val)
-#define x86_sub_percpu(var,val) percpu_to_op("sub", per_cpu__##var, val)
-#define x86_or_percpu(var,val) percpu_to_op("or", per_cpu__##var, val)
+#define x86_read_percpu(var) percpu_from_op("mov", seg_offset(var))
+#define x86_write_percpu(var,val) percpu_to_op("mov", seg_offset(var), val)
+#define x86_add_percpu(var,val) percpu_to_op("add", seg_offset(var), val)
+#define x86_sub_percpu(var,val) percpu_to_op("sub", seg_offset(var), val)
+#define x86_or_percpu(var,val) percpu_to_op("or", seg_offset(var), val)
#endif /* !__ASSEMBLY__ */
-#endif /* !CONFIG_X86_64 */
#endif /* _ASM_X86_PERCPU_H_ */
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -9,6 +9,10 @@
#include <asm/percpu.h>
+#ifndef PER_CPU_ATTRIBUTES
+#define PER_CPU_ATTRIBUTES
+#endif
+
#ifdef CONFIG_SMP
#define DEFINE_PER_CPU(type, name) \
__attribute__((__section__(".data.percpu"))) \
@@ -18,11 +22,18 @@
__attribute__((__section__(".data.percpu.shared_aligned"))) \
PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name \
____cacheline_aligned_in_smp
+
+#define DEFINE_PER_CPU_FIRST(type, name) \
+ __attribute__((__section__(".data.percpu.first"))) \
+ PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
#else
#define DEFINE_PER_CPU(type, name) \
PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
+#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
+ DEFINE_PER_CPU(type, name)
+
+#define DEFINE_PER_CPU_FIRST(type, name) \
DEFINE_PER_CPU(type, name)
#endif
--
WARNING: multiple messages have this Message-ID (diff)
From: travis@sgi.com
To: Andrew Morton <akpm@linux-foundation.org>,
Andi Kleen <ak@suse.de>,
mingo@elte.hu
Cc: Christoph Lameter <clameter@sgi.com>,
jeremy@goop.org, linux-mm@kvack.org,
linux-kernel@vger.kernel.org
Subject: [PATCH 2/3] x86_64: Fold pda into per cpu area
Date: Tue, 22 Jan 2008 20:49:26 -0800 [thread overview]
Message-ID: <20080123044924.926408000@sgi.com> (raw)
In-Reply-To: 20080123044924.508382000@sgi.com
[-- Attachment #1: x86_64_fold_pda --]
[-- Type: text/plain, Size: 6856 bytes --]
%gs:[&per_cpu_xxxx - __per_cpu_start]
* The boot_pdas are only needed in head64.c so move the declaration
over there and make it static.
* Remove the code that allocates special pda data structures.
Based on 2.6.24-rc8-mm1
Signed-off-by: Mike Travis <travis@sgi.com>
Reviewed-by: Christoph Lameter <clameter@sgi.com>
---
arch/x86/kernel/head64.c | 6 ++++++
arch/x86/kernel/setup64.c | 12 ++++++++++--
arch/x86/kernel/smpboot_64.c | 16 ----------------
include/asm-generic/vmlinux.lds.h | 1 +
include/asm-x86/pda.h | 1 -
include/asm-x86/percpu.h | 30 +++++++++++++++++++-----------
include/linux/percpu.h | 13 ++++++++++++-
7 files changed, 48 insertions(+), 31 deletions(-)
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -22,6 +22,12 @@
#include <asm/sections.h>
#include <asm/kdebug.h>
+/*
+ * Only used before the per cpu areas are setup. The use for the non possible
+ * cpus continues after boot
+ */
+static struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned;
+
static void __init zap_identity_mappings(void)
{
pgd_t *pgd = pgd_offset_k(0UL);
--- a/arch/x86/kernel/setup64.c
+++ b/arch/x86/kernel/setup64.c
@@ -34,7 +34,9 @@ cpumask_t cpu_initialized __cpuinitdata
struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(_cpu_pda);
-struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned;
+
+DEFINE_PER_CPU_FIRST(struct x8664_pda, pda);
+EXPORT_PER_CPU_SYMBOL(pda);
struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
@@ -150,10 +152,16 @@ void __init setup_per_cpu_areas(void)
}
if (!ptr)
panic("Cannot allocate cpu data for CPU %d\n", i);
- cpu_pda(i)->data_offset = ptr - __per_cpu_start;
memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
+ /* Relocate the pda */
+ memcpy(ptr, cpu_pda(i), sizeof(struct x8664_pda));
+ cpu_pda(i) = (struct x8664_pda *)ptr;
+ cpu_pda(i)->data_offset = ptr - __per_cpu_start;
}
+ /* Fix up pda for this processor .... */
+ pda_init(0);
+
/* setup percpu data maps early */
setup_per_cpu_maps();
}
--- a/arch/x86/kernel/smpboot_64.c
+++ b/arch/x86/kernel/smpboot_64.c
@@ -566,22 +566,6 @@ static int __cpuinit do_boot_cpu(int cpu
return -1;
}
- /* Allocate node local memory for AP pdas */
- if (cpu_pda(cpu) == &boot_cpu_pda[cpu]) {
- struct x8664_pda *newpda, *pda;
- int node = cpu_to_node(cpu);
- pda = cpu_pda(cpu);
- newpda = kmalloc_node(sizeof (struct x8664_pda), GFP_ATOMIC,
- node);
- if (newpda) {
- memcpy(newpda, pda, sizeof (struct x8664_pda));
- cpu_pda(cpu) = newpda;
- } else
- printk(KERN_ERR
- "Could not allocate node local PDA for CPU %d on node %d\n",
- cpu, node);
- }
-
alternatives_smp_switch(1);
c_idle.idle = get_idle_for_cpu(cpu);
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -273,6 +273,7 @@
. = ALIGN(align); \
__per_cpu_start = .; \
.data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { \
+ *(.data.percpu.first) \
*(.data.percpu) \
*(.data.percpu.shared_aligned) \
} \
--- a/include/asm-x86/pda.h
+++ b/include/asm-x86/pda.h
@@ -39,7 +39,6 @@ struct x8664_pda {
} ____cacheline_aligned_in_smp;
extern struct x8664_pda *_cpu_pda[];
-extern struct x8664_pda boot_cpu_pda[];
extern void pda_init(int);
#define cpu_pda(i) (_cpu_pda[i])
--- a/include/asm-x86/percpu.h
+++ b/include/asm-x86/percpu.h
@@ -16,7 +16,14 @@
#define __my_cpu_offset read_pda(data_offset)
#define per_cpu_offset(x) (__per_cpu_offset(x))
+#define __percpu_seg "%%gs:"
+/* Calculate the offset to use with the segment register */
+#define seg_offset(name) (*SHIFT_PERCPU_PTR(&per_cpu_var(name), \
+ - (unsigned long)__per_cpu_start))
+#else
+#define __percpu_seg ""
+#define seg_offset(name) per_cpu_var(name)
#endif
#include <asm-generic/percpu.h>
@@ -64,16 +71,11 @@ DECLARE_PER_CPU(struct x8664_pda, pda);
* PER_CPU(cpu_gdt_descr, %ebx)
*/
#ifdef CONFIG_SMP
-
#define __my_cpu_offset x86_read_percpu(this_cpu_off)
-
/* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */
#define __percpu_seg "%%fs:"
-
#else /* !SMP */
-
#define __percpu_seg ""
-
#endif /* SMP */
#include <asm-generic/percpu.h>
@@ -81,6 +83,13 @@ DECLARE_PER_CPU(struct x8664_pda, pda);
/* We can use this directly for local CPU (faster). */
DECLARE_PER_CPU(unsigned long, this_cpu_off);
+#define seg_offset(name) per_cpu_var(name)
+
+#endif /* __ASSEMBLY__ */
+#endif /* !CONFIG_X86_64 */
+
+#ifndef __ASSEMBLY__
+
/* For arch-specific code, we can use direct single-insn ops (they
* don't give an lvalue though). */
extern void __bad_percpu_size(void);
@@ -132,11 +141,10 @@ extern void __bad_percpu_size(void);
} \
ret__; })
-#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var)
-#define x86_write_percpu(var,val) percpu_to_op("mov", per_cpu__##var, val)
-#define x86_add_percpu(var,val) percpu_to_op("add", per_cpu__##var, val)
-#define x86_sub_percpu(var,val) percpu_to_op("sub", per_cpu__##var, val)
-#define x86_or_percpu(var,val) percpu_to_op("or", per_cpu__##var, val)
+#define x86_read_percpu(var) percpu_from_op("mov", seg_offset(var))
+#define x86_write_percpu(var,val) percpu_to_op("mov", seg_offset(var), val)
+#define x86_add_percpu(var,val) percpu_to_op("add", seg_offset(var), val)
+#define x86_sub_percpu(var,val) percpu_to_op("sub", seg_offset(var), val)
+#define x86_or_percpu(var,val) percpu_to_op("or", seg_offset(var), val)
#endif /* !__ASSEMBLY__ */
-#endif /* !CONFIG_X86_64 */
#endif /* _ASM_X86_PERCPU_H_ */
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -9,6 +9,10 @@
#include <asm/percpu.h>
+#ifndef PER_CPU_ATTRIBUTES
+#define PER_CPU_ATTRIBUTES
+#endif
+
#ifdef CONFIG_SMP
#define DEFINE_PER_CPU(type, name) \
__attribute__((__section__(".data.percpu"))) \
@@ -18,11 +22,18 @@
__attribute__((__section__(".data.percpu.shared_aligned"))) \
PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name \
____cacheline_aligned_in_smp
+
+#define DEFINE_PER_CPU_FIRST(type, name) \
+ __attribute__((__section__(".data.percpu.first"))) \
+ PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
#else
#define DEFINE_PER_CPU(type, name) \
PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
+#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
+ DEFINE_PER_CPU(type, name)
+
+#define DEFINE_PER_CPU_FIRST(type, name) \
DEFINE_PER_CPU(type, name)
#endif
--
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2008-01-23 4:50 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-01-23 4:49 [PATCH 0/3] percpu: Optimize percpu accesses travis
2008-01-23 4:49 ` travis
2008-01-23 4:49 ` [PATCH 1/3] generic: Percpu infrastructure to rebase the per cpu area to zero travis
2008-01-23 4:49 ` travis
2008-01-23 4:49 ` travis [this message]
2008-01-23 4:49 ` [PATCH 2/3] x86_64: Fold pda into per cpu area travis
2008-01-23 4:49 ` [PATCH 3/3] x86_64: Rebase per cpu variables to zero travis
2008-01-23 4:49 ` travis
2008-01-24 22:46 ` [PATCH 0/3] percpu: Optimize percpu accesses Ingo Molnar
2008-01-25 0:17 ` Mike Travis
2008-01-25 0:25 ` Ingo Molnar
2008-01-25 0:25 ` Ingo Molnar
2008-01-25 0:58 ` Mike Travis
2008-01-25 0:58 ` Mike Travis
2008-01-25 1:18 ` Jeremy Fitzhardinge
2008-01-25 1:18 ` Jeremy Fitzhardinge
2008-01-25 1:46 ` Jeremy Fitzhardinge
2008-01-25 1:46 ` Jeremy Fitzhardinge
2008-01-29 20:00 ` Mike Travis
2008-01-29 20:00 ` Mike Travis
2008-01-29 20:13 ` Christoph Lameter
2008-01-29 20:13 ` Christoph Lameter
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20080123044924.926408000@sgi.com \
--to=travis@sgi.com \
--cc=ak@suse.de \
--cc=akpm@linux-foundation.org \
--cc=clameter@sgi.com \
--cc=jeremy@goop.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mingo@elte.hu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.