All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tejun Heo <tj@kernel.org>
To: ebiederm@xmission.com, cl@linux-foundation.org,
	rusty@rustcorp.com.au, mingo@elte.hu, travis@sgi.com,
	linux-kernel@vger.kernel.org, hpa@zytor.com,
	akpm@linux-foundation.org, steiner@sgi.com, hugh@veritas.com
Cc: Tejun Heo <tj@kernel.org>
Subject: [PATCH 05/13] x86_64: make percpu symbols zerobased on SMP
Date: Tue, 13 Jan 2009 19:38:09 +0900	[thread overview]
Message-ID: <1231843097-18003-6-git-send-email-tj@kernel.org> (raw)
In-Reply-To: <1231843097-18003-1-git-send-email-tj@kernel.org>

This patch makes percpu symbols zerobased on x86_64 SMP by adding
PERCPU_VADDR() to vmlinux.lds.h which helps setting explicit vaddr on
the percpu output section and using it in vmlinux_64.lds.S.  A new
PHDR is added as existing ones cannot contain sections near address
zero.  PERCPU_VADDR() also adds a new symbol __per_cpu_load which
always points to the vaddr of the loaded percpu data.init region.

The following adjustments have been made to accomodate the address
change.

* code to locate percpu gdt_page in head_64.S is updated to add the
  load address to the gdt_page offset.

* __per_cpu_load is used in places where access to the init data area
  is necessary.

* pda->data_offset is initialized soon after C code is entered as zero
  value doesn't work anymore.

This patch is mostly taken from Mike Travis' "x86_64: Base percpu
variables at zero" patch.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Mike Travis <travis@sgi.com>
---
 arch/x86/kernel/head64.c          |    2 +
 arch/x86/kernel/head_64.S         |   24 ++++++++++++++++-
 arch/x86/kernel/setup_percpu.c    |    2 +-
 arch/x86/kernel/vmlinux_64.lds.S  |   17 +++++++++++-
 include/asm-generic/sections.h    |    2 +-
 include/asm-generic/vmlinux.lds.h |   51 ++++++++++++++++++++++++++++++++----
 6 files changed, 88 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 388e05a..a63261b 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -44,6 +44,8 @@ void __init x86_64_init_pda(void)
 {
 	_cpu_pda = __cpu_pda;
 	cpu_pda(0) = &_boot_cpu_pda;
+	cpu_pda(0)->data_offset =
+		(unsigned long)(__per_cpu_load - __per_cpu_start);
 	pda_init(0);
 }
 
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 26cfdc1..5ab77b3 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -204,6 +204,23 @@ ENTRY(secondary_startup_64)
 	pushq $0
 	popfq
 
+#ifdef CONFIG_SMP
+	/*
+	 * early_gdt_base should point to the gdt_page in static percpu init
+	 * data area.  Computing this requires two symbols - __per_cpu_load
+	 * and per_cpu__gdt_page.  As linker can't do no such relocation, do
+	 * it by hand.  As early_gdt_descr is manipulated by C code for
+	 * secondary CPUs, this should be done only once for the boot CPU
+	 * when early_gdt_descr_base contains zero.
+	 */
+	movq	early_gdt_descr_base(%rip), %rax
+	testq	%rax, %rax
+	jnz	1f
+	movq	$__per_cpu_load, %rax
+	addq	$per_cpu__gdt_page, %rax
+	movq	%rax, early_gdt_descr_base(%rip)
+1:
+#endif
 	/*
 	 * We must switch to a new descriptor in kernel space for the GDT
 	 * because soon the kernel won't have access anymore to the userspace
@@ -401,7 +418,12 @@ NEXT_PAGE(level2_spare_pgt)
 	.globl early_gdt_descr
 early_gdt_descr:
 	.word	GDT_ENTRIES*8-1
-	.quad   per_cpu__gdt_page
+#ifdef CONFIG_SMP
+early_gdt_descr_base:
+	.quad   0x0000000000000000
+#else
+	.quad	per_cpu__gdt_page
+#endif
 
 ENTRY(phys_base)
 	/* This must match the first entry in level2_kernel_pgt */
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index ee55e3b..8a22c94 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -194,7 +194,7 @@ void __init setup_per_cpu_areas(void)
 		}
 #endif
 		per_cpu_offset(cpu) = ptr - __per_cpu_start;
-		memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
+		memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start);
 
 		DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
 	}
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 1a614c0..f50280d 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -19,6 +19,9 @@ PHDRS {
 	data PT_LOAD FLAGS(7);	/* RWE */
 	user PT_LOAD FLAGS(7);	/* RWE */
 	data.init PT_LOAD FLAGS(7);	/* RWE */
+#ifdef CONFIG_SMP
+	percpu PT_LOAD FLAGS(7);	/* RWE */
+#endif
 	note PT_NOTE FLAGS(0);	/* ___ */
 }
 SECTIONS
@@ -208,14 +211,26 @@ SECTIONS
   __initramfs_end = .;
 #endif
 
+#ifdef CONFIG_SMP
+  /*
+   * percpu offsets are zero-based on SMP.  PERCPU_VADDR() changes the
+   * output PHDR, so the next output section - __data_nosave - should
+   * switch it back to data.init.
+   */
+  . = ALIGN(PAGE_SIZE);
+  PERCPU_VADDR(0, :percpu)
+#else
   PERCPU(PAGE_SIZE)
+#endif
 
   . = ALIGN(PAGE_SIZE);
   __init_end = .;
 
   . = ALIGN(PAGE_SIZE);
   __nosave_begin = .;
-  .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) }
+  .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
+      *(.data.nosave)
+  } :data.init	/* switch back to data.init, see PERCPU_VADDR() above */
   . = ALIGN(PAGE_SIZE);
   __nosave_end = .;
 
diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
index 79a7ff9..4ce48e8 100644
--- a/include/asm-generic/sections.h
+++ b/include/asm-generic/sections.h
@@ -9,7 +9,7 @@ extern char __bss_start[], __bss_stop[];
 extern char __init_begin[], __init_end[];
 extern char _sinittext[], _einittext[];
 extern char _end[];
-extern char __per_cpu_start[], __per_cpu_end[];
+extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[];
 extern char __kprobes_text_start[], __kprobes_text_end[];
 extern char __initdata_begin[], __initdata_end[];
 extern char __start_rodata[], __end_rodata[];
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index c61fab1..fc2f55f 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -430,12 +430,51 @@
   	*(.initcall7.init)						\
   	*(.initcall7s.init)
 
-#define PERCPU(align)							\
-	. = ALIGN(align);						\
-	VMLINUX_SYMBOL(__per_cpu_start) = .;				\
-	.data.percpu  : AT(ADDR(.data.percpu) - LOAD_OFFSET) {		\
+#define PERCPU_PROLOG(vaddr)						\
+	VMLINUX_SYMBOL(__per_cpu_load) = .;				\
+	.data.percpu vaddr : AT(__per_cpu_load - LOAD_OFFSET) {		\
+		VMLINUX_SYMBOL(__per_cpu_start) = .;
+
+#define PERCPU_EPILOG(phdr)						\
+		VMLINUX_SYMBOL(__per_cpu_end) = .;			\
+	} phdr								\
+	. = __per_cpu_load + SIZEOF(.data.percpu);
+
+/**
+ * PERCPU_VADDR - define output section for percpu area
+ * @vaddr: explicit base address (optional)
+ * @phdr: destination PHDR (optional)
+ *
+ * Macro which expands to output section for percpu area.  If @vaddr
+ * is not blank, it specifies explicit base address and all percpu
+ * symbols will be offset from the given address.  If blank, @vaddr
+ * always equals @laddr + LOAD_OFFSET.
+ *
+ * @phdr defines the output PHDR to use if not blank.  Be warned that
+ * output PHDR is sticky.  If @phdr is specified, the next output
+ * section in the linker script will go there too.  @phdr should have
+ * a leading colon.
+ *
+ * This macro defines three symbols, __per_cpu_load, __per_cpu_start
+ * and __per_cpu_end.  The first one is the vaddr of loaded percpu
+ * init data.  __per_cpu_start equals @vaddr and __per_cpu_end is the
+ * end offset.
+ */
+#define PERCPU_VADDR(vaddr, phdr)					\
+	PERCPU_PROLOG(vaddr)						\
 		*(.data.percpu.page_aligned)				\
 		*(.data.percpu)						\
 		*(.data.percpu.shared_aligned)				\
-	}								\
-	VMLINUX_SYMBOL(__per_cpu_end) = .;
+	PERCPU_EPILOG(phdr)
+
+/**
+ * PERCPU - define output section for percpu area, simple version
+ * @align: required alignment
+ *
+ * Align to @align and outputs output section for percpu area.  This
+ * macro doesn't maniuplate @vaddr or @phdr and __per_cpu_load and
+ * __per_cpu_start will be identical.
+ */
+#define PERCPU(align)							\
+	. = ALIGN(align);						\
+	PERCPU_VADDR( , )
-- 
1.5.6


  parent reply	other threads:[~2009-01-13 10:41 UTC|newest]

Thread overview: 113+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-01-13 10:38 [PATCHSET linux-2.6-x86:tip] x86: make percpu offsets zero-based on SMP Tejun Heo
2009-01-13 10:38 ` [PATCH 01/13] x86_64: fix pda_to_op() Tejun Heo
2009-01-13 10:38 ` [PATCH 02/13] x86: make early_per_cpu() a lvalue and use it Tejun Heo
2009-01-13 11:43   ` [PATCH 02/13 UPDATED] " Tejun Heo
2009-01-13 10:38 ` [PATCH 03/13] x86_64: Cleanup early setup_percpu references Tejun Heo
2009-01-13 10:38 ` [PATCH 04/13] x86_32: make vmlinux_32.lds.S use PERCPU() macro Tejun Heo
2009-01-13 10:38 ` Tejun Heo [this message]
2009-01-13 10:38 ` [PATCH 06/13] x86_64: load pointer to pda into %gs while brining up a CPU Tejun Heo
2009-01-13 10:38 ` [PATCH 07/13] x86_64: use static _cpu_pda array Tejun Heo
2009-01-13 10:38 ` [PATCH 08/13] x86_64: fold pda into percpu area on SMP Tejun Heo
2009-01-13 10:38 ` [PATCH 09/13] x86_64: merge 64 and 32 SMP percpu handling Tejun Heo
2009-01-13 10:38 ` [PATCH 10/13] x86_64: make pda a percpu variable Tejun Heo
2009-01-13 10:38 ` [PATCH 11/13] x86_64: convert pda ops to wrappers around x86 percpu accessors Tejun Heo
2009-01-13 10:38 ` [PATCH 12/13] x86_64: misc clean up after the percpu update Tejun Heo
2009-01-13 10:38 ` [PATCH 13/13] x86_32: make percpu symbols zerobased on SMP Tejun Heo
2009-01-14  0:18   ` Rusty Russell
2009-01-14  2:03     ` Tejun Heo
2009-01-13 10:48 ` [PATCHSET linux-2.6-x86:tip] x86: make percpu offsets zero-based " Tejun Heo
2009-01-13 13:27   ` Brian Gerst
2009-01-13 14:05     ` Tejun Heo
2009-01-13 14:26       ` Brian Gerst
2009-01-13 14:37         ` Tejun Heo
2009-01-14  6:57       ` H. Peter Anvin
2009-01-14  9:38         ` [patch] add optimized generic percpu accessors Ingo Molnar
2009-01-14  9:45           ` Ingo Molnar
2009-01-15 10:04           ` roel kluin
2009-01-15 10:26             ` Tejun Heo
2009-01-15 11:32               ` Ingo Molnar
2009-01-15 11:36                 ` Tejun Heo
2009-01-15 12:22                   ` Ingo Molnar
2009-01-15 13:09                     ` Tejun Heo
2009-01-15 13:32                       ` Ingo Molnar
2009-01-15 13:33                         ` Ingo Molnar
2009-01-15 13:39                         ` Ingo Molnar
2009-01-15 21:54                           ` Tejun Heo
2009-01-16  1:28                             ` [PATCH x86/percpu] x86: fix build bug introduced during merge Tejun Heo
2009-01-16  3:25                               ` [PATCH x86/percpu] x86_64: initialize this_cpu_off to __per_cpu_load Tejun Heo
2009-01-16 13:16                                 ` Ingo Molnar
2009-01-16 13:47                                   ` Tejun Heo
     [not found]                                     ` <20090116221651.GA32736@elte.hu>
     [not found]                                       ` <20090116223828.GA9294@elte.hu>
     [not found]                                         ` <49716F0E.7060605@gmail.com>
     [not found]                                           ` <49716FBF.7080203@kernel.org>
2009-01-17  6:29                                             ` [PATCH core/percpu REPOST] linker script: add missing VMLINUX_SYMBOL Tejun Heo
2009-01-17  6:32                                               ` [PATCH] linker script: add missing .data.percpu.page_aligned Tejun Heo
2009-01-17  8:21                                                 ` Ingo Molnar
2009-01-21 23:46                                                 ` Christoph Lameter
2009-01-21 23:45                                               ` [PATCH core/percpu REPOST] linker script: add missing VMLINUX_SYMBOL Christoph Lameter
2009-01-15 22:34                           ` [patch] add optimized generic percpu accessors Ingo Molnar
2009-01-15 20:31                     ` Christoph Lameter
2009-01-16  9:41                       ` Tejun Heo
2009-01-16 13:23                         ` Ingo Molnar
2009-01-16 13:49                           ` Tejun Heo
2009-01-15 10:26           ` Tejun Heo
2009-01-15 11:30             ` Ingo Molnar
2009-01-15 11:38               ` Tejun Heo
2009-01-15 12:26                 ` Ingo Molnar
2009-01-15 13:04                   ` Tejun Heo
2009-01-15 13:07                     ` Ingo Molnar
2009-01-15 13:23                       ` [PATCH] percpu: " Tejun Heo
2009-01-15 13:36                         ` Ingo Molnar
2009-01-15 17:30                         ` Andrew Morton
2009-01-15 18:02                           ` Ingo Molnar
2009-01-15 18:34                             ` Andrew Morton
2009-01-15 18:39                               ` Ingo Molnar
2009-01-15 21:53                                 ` Tejun Heo
2009-01-16  0:12                                 ` Herbert Xu
2009-01-16  0:15                                   ` Ingo Molnar
2009-01-16  0:18                                     ` Herbert Xu
     [not found]                                       ` <200901170827.33729.rusty@rustcorp.com.au>
2009-01-16 22:08                                         ` Ingo Molnar
     [not found]                                           ` <200901201328.24605.rusty@rustcorp.com.au>
2009-01-20  6:25                                             ` Tejun Heo
2009-01-20 10:36                                               ` Ingo Molnar
     [not found]                                               ` <200901271213.18605.rusty@rustcorp.com.au>
2009-01-27  2:24                                                 ` Tejun Heo
2009-01-27 13:13                                                   ` Ingo Molnar
2009-01-27 23:07                                                     ` Tejun Heo
2009-01-28  3:36                                                       ` Tejun Heo
2009-01-28  8:12                                                         ` Tejun Heo
2009-01-27 20:08                                                   ` Christoph Lameter
2009-01-27 21:47                                                     ` David Miller
2009-01-27 22:47                                                       ` Rick Jones
2009-01-27 22:47                                                         ` Rick Jones
2009-01-28  0:17                                                         ` Luck, Tony
2009-01-28  0:17                                                           ` Luck, Tony
2009-01-28 16:48                                                           ` Christoph Lameter
2009-01-28 16:48                                                             ` Christoph Lameter
2009-01-28 17:15                                                             ` Luck, Tony
2009-01-28 17:15                                                               ` Luck, Tony
2009-01-28 16:45                                                       ` Christoph Lameter
2009-01-28 20:47                                                         ` David Miller
2009-01-28 10:38                                                   ` Rusty Russell
2009-01-28 10:56                                                     ` Tejun Heo
2009-01-29  2:06                                                       ` Rusty Russell
2009-01-31  6:11                                                         ` Tejun Heo
2009-01-28 16:50                                                     ` Christoph Lameter
2009-01-28 18:07                                                       ` Mathieu Desnoyers
2009-01-29 18:33                                                         ` Christoph Lameter
2009-01-29 18:48                                                           ` H. Peter Anvin
2009-01-20 10:40                                             ` Ingo Molnar
2009-01-21  5:52                                               ` Tejun Heo
2009-01-21 10:05                                                 ` Ingo Molnar
2009-01-21 11:21                                                 ` Eric W. Biederman
2009-01-21 12:45                                                   ` Stephen Hemminger
2009-01-21 14:13                                                     ` Eric W. Biederman
2009-01-21 20:34                                                     ` David Miller
2009-01-16  1:09                                     ` H. Peter Anvin
     [not found]                                   ` <200901170804.18622.rusty@rustcorp.com.au>
2009-01-16 21:59                                     ` Ingo Molnar
2009-01-16 22:09                                     ` Ingo Molnar
2009-01-16 14:10                               ` Mark Lord
2009-01-15 18:46                         ` Ingo Molnar
2009-01-31 10:36                         ` Jeremy Fitzhardinge
2009-01-15 13:59                     ` [patch] " roel kluin
2009-01-15 21:51                       ` Tejun Heo
     [not found]                 ` <200901170748.53734.rusty@rustcorp.com.au>
2009-01-16 21:24                   ` Ingo Molnar
     [not found]           ` <200901151253.44016.rusty@rustcorp.com.au>
2009-01-15  9:55             ` Ingo Molnar
2009-01-15 10:27             ` Tejun Heo
2009-01-31 10:30           ` Jeremy Fitzhardinge
2009-01-31 16:00             ` Ingo Molnar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1231843097-18003-6-git-send-email-tj@kernel.org \
    --to=tj@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=cl@linux-foundation.org \
    --cc=ebiederm@xmission.com \
    --cc=hpa@zytor.com \
    --cc=hugh@veritas.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=rusty@rustcorp.com.au \
    --cc=steiner@sgi.com \
    --cc=travis@sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.