* [patch] x86_64: align and pad x86_64 GDT on page boundary
@ 2005-12-08 19:42 Ravikiran G Thirumalai
2005-12-08 20:15 ` [discuss] " Andi Kleen
0 siblings, 1 reply; 18+ messages in thread
From: Ravikiran G Thirumalai @ 2005-12-08 19:42 UTC (permalink / raw)
To: Andrew Morton; +Cc: Andi Kleen, linux-kernel, discuss, zach, shai, nippung
This patch is on the same lines as Zachary Amsden's i386 GDT page alignemnt
patch in -mm, but for x86_64.
Thanks,
Kiran
Patch to align and pad x86_64 GDT on page boundries.
Signed-off-by: Nippun Goel <nippung@calsoftinc.com>
Signed-off-by: Ravikiran Thirumalai <kiran@scalex86.org>
Signed-off-by: Shai Fultheim <shai@scalex86.org>
Index: linux-2.6.15-rc3/arch/x86_64/kernel/head.S
===================================================================
--- linux-2.6.15-rc3.orig/arch/x86_64/kernel/head.S 2005-12-01 15:49:27.000000000 -0800
+++ linux-2.6.15-rc3/arch/x86_64/kernel/head.S 2005-12-01 16:19:29.000000000 -0800
@@ -379,7 +379,7 @@
* Also sysret mandates a special GDT layout
*/
-.align L1_CACHE_BYTES
+.align PAGE_SIZE
/* The TLS descriptors are currently at a different place compared to i386.
Hopefully nobody expects them at a fixed place (Wine?) */
@@ -401,10 +401,11 @@
gdt_end:
/* asm/segment.h:GDT_ENTRIES must match this */
/* This should be a multiple of the cache line size */
- /* GDTs of other CPUs: */
- .fill (GDT_SIZE * NR_CPUS) - (gdt_end - cpu_gdt_table)
+ /* GDTs of other CPUs are now dynamically allocated */
- .align L1_CACHE_BYTES
+ /* zero the remaining page */
+ .fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0
+
ENTRY(idt_table)
.rept 256
.quad 0
Index: linux-2.6.15-rc3/arch/x86_64/kernel/setup64.c
===================================================================
--- linux-2.6.15-rc3.orig/arch/x86_64/kernel/setup64.c 2005-12-01 16:19:28.000000000 -0800
+++ linux-2.6.15-rc3/arch/x86_64/kernel/setup64.c 2005-12-01 16:19:29.000000000 -0800
@@ -232,15 +232,15 @@
* and set up the GDT descriptor:
*/
if (cpu) {
- memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE);
+ memcpy(get_cpu_gdt_table(cpu), cpu_gdt_table, GDT_SIZE);
}
cpu_gdt_descr[cpu].size = GDT_SIZE;
- cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu];
+
asm volatile("lgdt %0" :: "m" (cpu_gdt_descr[cpu]));
asm volatile("lidt %0" :: "m" (idt_descr));
- memcpy(me->thread.tls_array, cpu_gdt_table[cpu], GDT_ENTRY_TLS_ENTRIES * 8);
+ memcpy(me->thread.tls_array, get_cpu_gdt_table(cpu), GDT_ENTRY_TLS_ENTRIES * 8);
/*
* Delete NT
Index: linux-2.6.15-rc3/arch/x86_64/kernel/smpboot.c
===================================================================
--- linux-2.6.15-rc3.orig/arch/x86_64/kernel/smpboot.c 2005-12-01 16:19:28.000000000 -0800
+++ linux-2.6.15-rc3/arch/x86_64/kernel/smpboot.c 2005-12-01 16:19:29.000000000 -0800
@@ -743,6 +743,13 @@
};
DECLARE_WORK(work, do_fork_idle, &c_idle);
+ /* allocate memory for gdts of secondary cpus. Hotplug is considered */
+ if (!cpu_gdt_descr[cpu].address &&
+ !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) {
+ printk("Failed to allocate GDT for CPU %d\n", cpu);
+ return 1;
+ }
+
c_idle.idle = get_idle_for_cpu(cpu);
if (c_idle.idle) {
Index: linux-2.6.15-rc3/arch/x86_64/kernel/suspend.c
===================================================================
--- linux-2.6.15-rc3.orig/arch/x86_64/kernel/suspend.c 2005-12-01 15:49:27.000000000 -0800
+++ linux-2.6.15-rc3/arch/x86_64/kernel/suspend.c 2005-12-01 16:19:29.000000000 -0800
@@ -120,7 +120,7 @@
set_tss_desc(cpu,t); /* This just modifies memory; should not be neccessary. But... This is neccessary, because 386 hardware has concept of busy TSS or some similar stupidity. */
- cpu_gdt_table[cpu][GDT_ENTRY_TSS].type = 9;
+ get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS].type = 9;
syscall_init(); /* This sets MSR_*STAR and related */
load_TR_desc(); /* This does ltr */
Index: linux-2.6.15-rc3/include/asm-x86_64/desc.h
===================================================================
--- linux-2.6.15-rc3.orig/include/asm-x86_64/desc.h 2005-12-01 15:49:27.000000000 -0800
+++ linux-2.6.15-rc3/include/asm-x86_64/desc.h 2005-12-01 16:19:29.000000000 -0800
@@ -25,7 +25,7 @@
unsigned int a,b;
};
-extern struct desc_struct cpu_gdt_table[NR_CPUS][GDT_ENTRIES];
+extern struct desc_struct cpu_gdt_table[GDT_ENTRIES];
enum {
GATE_INTERRUPT = 0xE,
@@ -79,6 +79,9 @@
extern struct gate_struct idt_table[];
extern struct desc_ptr cpu_gdt_descr[];
+/* the cpu gdt accessor */
+#define get_cpu_gdt_table(_cpu) ((struct desc_struct *)cpu_gdt_descr[(_cpu)].address)
+
static inline void _set_gate(void *adr, unsigned type, unsigned long func, unsigned dpl, unsigned ist)
{
struct gate_struct s;
@@ -139,20 +142,20 @@
* -1? seg base+limit should be pointing to the address of the
* last valid byte
*/
- set_tssldt_descriptor(&cpu_gdt_table[cpu][GDT_ENTRY_TSS],
+ set_tssldt_descriptor(&get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS],
(unsigned long)addr, DESC_TSS,
IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1);
}
static inline void set_ldt_desc(unsigned cpu, void *addr, int size)
{
- set_tssldt_descriptor(&cpu_gdt_table[cpu][GDT_ENTRY_LDT], (unsigned long)addr,
+ set_tssldt_descriptor(&get_cpu_gdt_table(cpu)[GDT_ENTRY_LDT], (unsigned long)addr,
DESC_LDT, size * 8 - 1);
}
static inline void set_seg_base(unsigned cpu, int entry, void *base)
{
- struct desc_struct *d = &cpu_gdt_table[cpu][entry];
+ struct desc_struct *d = &get_cpu_gdt_table(cpu)[entry];
u32 addr = (u32)(u64)base;
BUG_ON((u64)base >> 32);
d->base0 = addr & 0xffff;
@@ -194,7 +197,7 @@
static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
{
- u64 *gdt = (u64 *)(cpu_gdt_table[cpu] + GDT_ENTRY_TLS_MIN);
+ u64 *gdt = (u64 *)(get_cpu_gdt_table(cpu) + GDT_ENTRY_TLS_MIN);
gdt[0] = t->tls_array[0];
gdt[1] = t->tls_array[1];
gdt[2] = t->tls_array[2];
^ permalink raw reply [flat|nested] 18+ messages in thread* Re: [discuss] [patch] x86_64: align and pad x86_64 GDT on page boundary 2005-12-08 19:42 [patch] x86_64: align and pad x86_64 GDT on page boundary Ravikiran G Thirumalai @ 2005-12-08 20:15 ` Andi Kleen 2005-12-08 21:55 ` Ravikiran G Thirumalai 0 siblings, 1 reply; 18+ messages in thread From: Andi Kleen @ 2005-12-08 20:15 UTC (permalink / raw) To: Ravikiran G Thirumalai Cc: Andrew Morton, Andi Kleen, linux-kernel, discuss, zach, shai, nippung On Thu, Dec 08, 2005 at 11:42:32AM -0800, Ravikiran G Thirumalai wrote: > > - .align L1_CACHE_BYTES > + /* zero the remaining page */ > + .fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0 > + > ENTRY(idt_table) Why can't the IDT not be shared with the GDT page? It should be mostly read only right and putting r-o data on that page should be ok, right? > @@ -743,6 +743,13 @@ > }; > DECLARE_WORK(work, do_fork_idle, &c_idle); > > + /* allocate memory for gdts of secondary cpus. Hotplug is considered */ > + if (!cpu_gdt_descr[cpu].address && > + !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) { > + printk("Failed to allocate GDT for CPU %d\n", cpu); > + return 1; Is this return really correctly handled? Doubtful. -Andi ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [discuss] [patch] x86_64: align and pad x86_64 GDT on page boundary 2005-12-08 20:15 ` [discuss] " Andi Kleen @ 2005-12-08 21:55 ` Ravikiran G Thirumalai 2005-12-08 23:09 ` Rohit Seth 0 siblings, 1 reply; 18+ messages in thread From: Ravikiran G Thirumalai @ 2005-12-08 21:55 UTC (permalink / raw) To: Andi Kleen; +Cc: Andrew Morton, linux-kernel, discuss, zach, shai, nippung On Thu, Dec 08, 2005 at 09:15:18PM +0100, Andi Kleen wrote: > On Thu, Dec 08, 2005 at 11:42:32AM -0800, Ravikiran G Thirumalai wrote: > > > > - .align L1_CACHE_BYTES > > + /* zero the remaining page */ > > + .fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0 > > + > > ENTRY(idt_table) > > Why can't the IDT not be shared with the GDT page? It should be mostly > read only right and putting r-o data on that page should be ok, right? Yes, you are right. This should not have been a problem. Some people reported this symbol (cpu_gdt) though. Will have to go back and check. Thanks, Kiran ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [discuss] [patch] x86_64: align and pad x86_64 GDT on page boundary 2005-12-08 21:55 ` Ravikiran G Thirumalai @ 2005-12-08 23:09 ` Rohit Seth 2005-12-08 23:11 ` Andi Kleen 0 siblings, 1 reply; 18+ messages in thread From: Rohit Seth @ 2005-12-08 23:09 UTC (permalink / raw) To: Ravikiran G Thirumalai Cc: Andi Kleen, Andrew Morton, linux-kernel, discuss, zach, shai, nippung On Thu, 2005-12-08 at 13:55 -0800, Ravikiran G Thirumalai wrote: > On Thu, Dec 08, 2005 at 09:15:18PM +0100, Andi Kleen wrote: > > On Thu, Dec 08, 2005 at 11:42:32AM -0800, Ravikiran G Thirumalai > wrote: > > > > > > - .align L1_CACHE_BYTES > > > + /* zero the remaining page */ > > > + .fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0 > > > + > > > ENTRY(idt_table) > > > > Why can't the IDT not be shared with the GDT page? It should be > mostly > > read only right and putting r-o data on that page should be ok, > right? > > Yes, you are right. This should not have been a problem. > Some people reported this symbol (cpu_gdt) though. Will have to go > back and > check. IIRC, Zach's patches for gdt alignment, moved the gdts from per_cpu data structure to each secondary CPU dynamically allocating page for its gdt. This was mainly to address the excessive cost in virtualized environments of sharing the gdt page with other RW items in per_cpu_data structure. I think the padding was because xen does not like sharing gdt page with anything else... I agree that if no other constraint is there then IDT could be moved into the same page. -rohit ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [discuss] [patch] x86_64: align and pad x86_64 GDT on page boundary 2005-12-08 23:09 ` Rohit Seth @ 2005-12-08 23:11 ` Andi Kleen 2005-12-08 23:26 ` Rohit Seth 0 siblings, 1 reply; 18+ messages in thread From: Andi Kleen @ 2005-12-08 23:11 UTC (permalink / raw) To: Rohit Seth Cc: Ravikiran G Thirumalai, Andi Kleen, Andrew Morton, linux-kernel, discuss, zach, shai, nippung On Thu, Dec 08, 2005 at 03:09:17PM -0800, Rohit Seth wrote: > On Thu, 2005-12-08 at 13:55 -0800, Ravikiran G Thirumalai wrote: > > On Thu, Dec 08, 2005 at 09:15:18PM +0100, Andi Kleen wrote: > > > On Thu, Dec 08, 2005 at 11:42:32AM -0800, Ravikiran G Thirumalai > > wrote: > > > > > > > > - .align L1_CACHE_BYTES > > > > + /* zero the remaining page */ > > > > + .fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0 > > > > + > > > > ENTRY(idt_table) > > > > > > Why can't the IDT not be shared with the GDT page? It should be > > mostly > > > read only right and putting r-o data on that page should be ok, > > right? > > > > Yes, you are right. This should not have been a problem. > > Some people reported this symbol (cpu_gdt) though. Will have to go > > back and > > check. > > IIRC, Zach's patches for gdt alignment, moved the gdts from per_cpu data > structure to each secondary CPU dynamically allocating page for its gdt. Kiran's patch does this too. Except for the BP GDT, which could be shared with the single IDT. -Andi (who actually plans to attack per CPU IDTs at some point so this could change later) ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [discuss] [patch] x86_64: align and pad x86_64 GDT on page boundary 2005-12-08 23:11 ` Andi Kleen @ 2005-12-08 23:26 ` Rohit Seth 2005-12-08 23:26 ` Andi Kleen 0 siblings, 1 reply; 18+ messages in thread From: Rohit Seth @ 2005-12-08 23:26 UTC (permalink / raw) To: Andi Kleen Cc: Ravikiran G Thirumalai, Andrew Morton, linux-kernel, discuss, zach, shai, nippung On Fri, 2005-12-09 at 00:11 +0100, Andi Kleen wrote: > On Thu, Dec 08, 2005 at 03:09:17PM -0800, Rohit Seth wrote: > > > > IIRC, Zach's patches for gdt alignment, moved the gdts from per_cpu data > > structure to each secondary CPU dynamically allocating page for its gdt. > > Kiran's patch does this too. Except for the BP GDT, which could > be shared with the single IDT. > ...that padding in BP's GDT (in this and original patches) could be because of the Xen requirements to have dedicated pages for gdt. -rohit ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [discuss] [patch] x86_64: align and pad x86_64 GDT on page boundary 2005-12-08 23:26 ` Rohit Seth @ 2005-12-08 23:26 ` Andi Kleen 2005-12-08 23:45 ` Rohit Seth 0 siblings, 1 reply; 18+ messages in thread From: Andi Kleen @ 2005-12-08 23:26 UTC (permalink / raw) To: Rohit Seth Cc: Andi Kleen, Ravikiran G Thirumalai, Andrew Morton, linux-kernel, discuss, zach, shai, nippung On Thu, Dec 08, 2005 at 03:26:07PM -0800, Rohit Seth wrote: > On Fri, 2005-12-09 at 00:11 +0100, Andi Kleen wrote: > > On Thu, Dec 08, 2005 at 03:09:17PM -0800, Rohit Seth wrote: > > > > > > IIRC, Zach's patches for gdt alignment, moved the gdts from per_cpu data > > > structure to each secondary CPU dynamically allocating page for its gdt. > > > > Kiran's patch does this too. Except for the BP GDT, which could > > be shared with the single IDT. > > > > ...that padding in BP's GDT (in this and original patches) could be > because of the Xen requirements to have dedicated pages for gdt. Well if the Xen people have such requirements they can submit separate patches. Currently they don't seem to be interested at all in submitting patches to mainline, so we must work with the VM hackers who are interested in this (scalex86, VMware) And AFAIK they only care about not having false sharing in there. -Andi ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [discuss] [patch] x86_64: align and pad x86_64 GDT on page boundary 2005-12-08 23:26 ` Andi Kleen @ 2005-12-08 23:45 ` Rohit Seth 2005-12-08 23:43 ` Andi Kleen 2005-12-08 23:50 ` Rohit Seth 0 siblings, 2 replies; 18+ messages in thread From: Rohit Seth @ 2005-12-08 23:45 UTC (permalink / raw) To: Andi Kleen Cc: Ravikiran G Thirumalai, Andrew Morton, linux-kernel, discuss, zach, shai, nippung On Fri, 2005-12-09 at 00:26 +0100, Andi Kleen wrote: > Well if the Xen people have such requirements they can submit > separate patches. Currently they don't seem to be interested > at all in submitting patches to mainline, so we must work > with the VM hackers who are interested in this (scalex86, VMware) > And AFAIK they only care about not having false sharing in there. > Agreed. Though do we need to have full page allocated for each gdt (256 bytes) then? ...possibly use kmalloc. -rohit ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [discuss] [patch] x86_64: align and pad x86_64 GDT on page boundary 2005-12-08 23:45 ` Rohit Seth @ 2005-12-08 23:43 ` Andi Kleen 2005-12-09 22:19 ` Ravikiran G Thirumalai 2005-12-08 23:50 ` Rohit Seth 1 sibling, 1 reply; 18+ messages in thread From: Andi Kleen @ 2005-12-08 23:43 UTC (permalink / raw) To: Rohit Seth Cc: Andi Kleen, Ravikiran G Thirumalai, Andrew Morton, linux-kernel, discuss, zach, shai, nippung On Thu, Dec 08, 2005 at 03:45:11PM -0800, Rohit Seth wrote: > On Fri, 2005-12-09 at 00:26 +0100, Andi Kleen wrote: > > > Well if the Xen people have such requirements they can submit > > separate patches. Currently they don't seem to be interested > > at all in submitting patches to mainline, so we must work > > with the VM hackers who are interested in this (scalex86, VMware) > > And AFAIK they only care about not having false sharing in there. > > > > > Agreed. > > Though do we need to have full page allocated for each gdt (256 bytes) > then? ...possibly use kmalloc. For scalex I think it needs to be page aligned because that is what the effective cacheline size for remote nodes is in their setup. That would be difficult for kmalloc because it cannot guarantee that alignment nor avoid false sharing. For the BP case it's ok as long as the beginning is correctly aligned and the rest is read-only. -Andi ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [discuss] [patch] x86_64: align and pad x86_64 GDT on page boundary 2005-12-08 23:43 ` Andi Kleen @ 2005-12-09 22:19 ` Ravikiran G Thirumalai 2005-12-09 22:57 ` Andi Kleen ` (2 more replies) 0 siblings, 3 replies; 18+ messages in thread From: Ravikiran G Thirumalai @ 2005-12-09 22:19 UTC (permalink / raw) To: Andi Kleen Cc: Rohit Seth, Andrew Morton, linux-kernel, discuss, zach, shai, nippung On Fri, Dec 09, 2005 at 12:43:20AM +0100, Andi Kleen wrote: > On Thu, Dec 08, 2005 at 03:45:11PM -0800, Rohit Seth wrote: > > On Fri, 2005-12-09 at 00:26 +0100, Andi Kleen wrote: > > > > > Well if the Xen people have such requirements they can submit > > > separate patches. Currently they don't seem to be interested > > > at all in submitting patches to mainline, so we must work > > > with the VM hackers who are interested in this (scalex86, VMware) > > > And AFAIK they only care about not having false sharing in there. > > > > > > > > > Agreed. > > > > Though do we need to have full page allocated for each gdt (256 bytes) > > then? ...possibly use kmalloc. > > For scalex I think it needs to be page aligned because that is what > the effective cacheline size for remote nodes is in their setup. > That would be difficult for kmalloc because it cannot guarantee that > alignment nor avoid false sharing. Exactly. > For the BP case it's ok as > long as the beginning is correctly aligned and the rest > is read-only. Just that any writes on the bp GDT will invalidate the idt_table cacheline, which is read mostly (as Nippun pointed out). So could we keep the padding as it is for the BP too? Attached is the patch which fixes the retval in do_boot_cpu to return -1. __cpu_up handles -1. Hope this patch is OK. Thanks, Kiran --- This patch is on the same lines as Zachary Amsden's i386 GDT page alignemnt patch in -mm, but for x86_64. Patch to align and pad x86_64 GDT on page boundries. Signed-off-by: Nippun Goel <nippung@calsoftinc.com> Signed-off-by: Ravikiran Thirumalai <kiran@scalex86.org> Signed-off-by: Shai Fultheim <shai@scalex86.org> Index: linux-2.6.15-rc3/arch/x86_64/kernel/head.S =================================================================== --- linux-2.6.15-rc3.orig/arch/x86_64/kernel/head.S 2005-12-01 15:49:27.000000000 -0800 +++ linux-2.6.15-rc3/arch/x86_64/kernel/head.S 2005-12-01 16:19:29.000000000 -0800 @@ -379,7 +379,7 @@ * Also sysret mandates a special GDT layout */ -.align L1_CACHE_BYTES +.align PAGE_SIZE /* The TLS descriptors are currently at a different place compared to i386. Hopefully nobody expects them at a fixed place (Wine?) */ @@ -401,10 +401,11 @@ gdt_end: /* asm/segment.h:GDT_ENTRIES must match this */ /* This should be a multiple of the cache line size */ - /* GDTs of other CPUs: */ - .fill (GDT_SIZE * NR_CPUS) - (gdt_end - cpu_gdt_table) + /* GDTs of other CPUs are now dynamically allocated */ - .align L1_CACHE_BYTES + /* zero the remaining page */ + .fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0 + ENTRY(idt_table) .rept 256 .quad 0 Index: linux-2.6.15-rc3/arch/x86_64/kernel/setup64.c =================================================================== --- linux-2.6.15-rc3.orig/arch/x86_64/kernel/setup64.c 2005-12-01 16:19:28.000000000 -0800 +++ linux-2.6.15-rc3/arch/x86_64/kernel/setup64.c 2005-12-01 16:19:29.000000000 -0800 @@ -232,15 +232,15 @@ * and set up the GDT descriptor: */ if (cpu) { - memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE); + memcpy(get_cpu_gdt_table(cpu), cpu_gdt_table, GDT_SIZE); } cpu_gdt_descr[cpu].size = GDT_SIZE; - cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu]; + asm volatile("lgdt %0" :: "m" (cpu_gdt_descr[cpu])); asm volatile("lidt %0" :: "m" (idt_descr)); - memcpy(me->thread.tls_array, cpu_gdt_table[cpu], GDT_ENTRY_TLS_ENTRIES * 8); + memcpy(me->thread.tls_array, get_cpu_gdt_table(cpu), GDT_ENTRY_TLS_ENTRIES * 8); /* * Delete NT Index: linux-2.6.15-rc3/arch/x86_64/kernel/smpboot.c =================================================================== --- linux-2.6.15-rc3.orig/arch/x86_64/kernel/smpboot.c 2005-12-01 16:19:28.000000000 -0800 +++ linux-2.6.15-rc3/arch/x86_64/kernel/smpboot.c 2005-12-01 16:19:29.000000000 -0800 @@ -743,6 +743,13 @@ }; DECLARE_WORK(work, do_fork_idle, &c_idle); + /* allocate memory for gdts of secondary cpus. Hotplug is considered */ + if (!cpu_gdt_descr[cpu].address && + !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) { + printk("Failed to allocate GDT for CPU %d\n", cpu); + return -1; + } + c_idle.idle = get_idle_for_cpu(cpu); if (c_idle.idle) { Index: linux-2.6.15-rc3/arch/x86_64/kernel/suspend.c =================================================================== --- linux-2.6.15-rc3.orig/arch/x86_64/kernel/suspend.c 2005-12-01 15:49:27.000000000 -0800 +++ linux-2.6.15-rc3/arch/x86_64/kernel/suspend.c 2005-12-01 16:19:29.000000000 -0800 @@ -120,7 +120,7 @@ set_tss_desc(cpu,t); /* This just modifies memory; should not be neccessary. But... This is neccessary, because 386 hardware has concept of busy TSS or some similar stupidity. */ - cpu_gdt_table[cpu][GDT_ENTRY_TSS].type = 9; + get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS].type = 9; syscall_init(); /* This sets MSR_*STAR and related */ load_TR_desc(); /* This does ltr */ Index: linux-2.6.15-rc3/include/asm-x86_64/desc.h =================================================================== --- linux-2.6.15-rc3.orig/include/asm-x86_64/desc.h 2005-12-01 15:49:27.000000000 -0800 +++ linux-2.6.15-rc3/include/asm-x86_64/desc.h 2005-12-01 16:19:29.000000000 -0800 @@ -25,7 +25,7 @@ unsigned int a,b; }; -extern struct desc_struct cpu_gdt_table[NR_CPUS][GDT_ENTRIES]; +extern struct desc_struct cpu_gdt_table[GDT_ENTRIES]; enum { GATE_INTERRUPT = 0xE, @@ -79,6 +79,9 @@ extern struct gate_struct idt_table[]; extern struct desc_ptr cpu_gdt_descr[]; +/* the cpu gdt accessor */ +#define get_cpu_gdt_table(_cpu) ((struct desc_struct *)cpu_gdt_descr[(_cpu)].address) + static inline void _set_gate(void *adr, unsigned type, unsigned long func, unsigned dpl, unsigned ist) { struct gate_struct s; @@ -139,20 +142,20 @@ * -1? seg base+limit should be pointing to the address of the * last valid byte */ - set_tssldt_descriptor(&cpu_gdt_table[cpu][GDT_ENTRY_TSS], + set_tssldt_descriptor(&get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS], (unsigned long)addr, DESC_TSS, IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1); } static inline void set_ldt_desc(unsigned cpu, void *addr, int size) { - set_tssldt_descriptor(&cpu_gdt_table[cpu][GDT_ENTRY_LDT], (unsigned long)addr, + set_tssldt_descriptor(&get_cpu_gdt_table(cpu)[GDT_ENTRY_LDT], (unsigned long)addr, DESC_LDT, size * 8 - 1); } static inline void set_seg_base(unsigned cpu, int entry, void *base) { - struct desc_struct *d = &cpu_gdt_table[cpu][entry]; + struct desc_struct *d = &get_cpu_gdt_table(cpu)[entry]; u32 addr = (u32)(u64)base; BUG_ON((u64)base >> 32); d->base0 = addr & 0xffff; @@ -194,7 +197,7 @@ static inline void load_TLS(struct thread_struct *t, unsigned int cpu) { - u64 *gdt = (u64 *)(cpu_gdt_table[cpu] + GDT_ENTRY_TLS_MIN); + u64 *gdt = (u64 *)(get_cpu_gdt_table(cpu) + GDT_ENTRY_TLS_MIN); gdt[0] = t->tls_array[0]; gdt[1] = t->tls_array[1]; gdt[2] = t->tls_array[2]; ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [discuss] [patch] x86_64: align and pad x86_64 GDT on page boundary 2005-12-09 22:19 ` Ravikiran G Thirumalai @ 2005-12-09 22:57 ` Andi Kleen 2005-12-09 23:01 ` Rohit Seth 2005-12-12 2:34 ` Zwane Mwaikambo 2 siblings, 0 replies; 18+ messages in thread From: Andi Kleen @ 2005-12-09 22:57 UTC (permalink / raw) To: Ravikiran G Thirumalai Cc: Andi Kleen, Rohit Seth, Andrew Morton, linux-kernel, discuss, zach, shai, nippung > Just that any writes on the bp GDT will invalidate the idt_table cacheline, > which is read mostly (as Nippun pointed out). So could we keep the padding > as it is for the BP too? Yes, good point. > Attached is the patch which fixes the retval in do_boot_cpu to return -1. > __cpu_up handles -1. > > Hope this patch is OK. Looks good. Thanks, -Andi ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [discuss] [patch] x86_64: align and pad x86_64 GDT on page boundary 2005-12-09 22:19 ` Ravikiran G Thirumalai 2005-12-09 22:57 ` Andi Kleen @ 2005-12-09 23:01 ` Rohit Seth 2005-12-09 22:59 ` Andi Kleen 2005-12-12 2:34 ` Zwane Mwaikambo 2 siblings, 1 reply; 18+ messages in thread From: Rohit Seth @ 2005-12-09 23:01 UTC (permalink / raw) To: Ravikiran G Thirumalai Cc: Andi Kleen, Andrew Morton, linux-kernel, discuss, zach, shai, nippung On Fri, 2005-12-09 at 14:19 -0800, Ravikiran G Thirumalai wrote: > On Fri, Dec 09, 2005 at 12:43:20AM +0100, Andi Kleen wrote: > > > > For scalex I think it needs to be page aligned because that is what > > the effective cacheline size for remote nodes is in their setup. > > That would be difficult for kmalloc because it cannot guarantee that > > alignment nor avoid false sharing. > > Exactly. > Are you saying remote node will cache a whole page for every byte access on that page. > > For the BP case it's ok as > > long as the beginning is correctly aligned and the rest > > is read-only. > > Just that any writes on the bp GDT will invalidate the idt_table cacheline, > which is read mostly (as Nippun pointed out). So could we keep the padding > as it is for the BP too? > Do you write into GDT often for this to be an issue. The reason I'm asking this because the per-cpu IDTs that Andi refered in the future. If we are really not using too many bytes in GDT then rest of the page can be used for IDT and such mostly RO data. -rohit ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [discuss] [patch] x86_64: align and pad x86_64 GDT on page boundary 2005-12-09 23:01 ` Rohit Seth @ 2005-12-09 22:59 ` Andi Kleen 2005-12-09 23:46 ` Ravikiran G Thirumalai 0 siblings, 1 reply; 18+ messages in thread From: Andi Kleen @ 2005-12-09 22:59 UTC (permalink / raw) To: Rohit Seth Cc: Ravikiran G Thirumalai, Andi Kleen, Andrew Morton, linux-kernel, discuss, zach, shai, nippung On Fri, Dec 09, 2005 at 03:01:27PM -0800, Rohit Seth wrote: > > > For the BP case it's ok as > > > long as the beginning is correctly aligned and the rest > > > is read-only. > > > > Just that any writes on the bp GDT will invalidate the idt_table cacheline, > > which is read mostly (as Nippun pointed out). So could we keep the padding > > as it is for the BP too? > > > > Do you write into GDT often for this to be an issue. The reason I'm The context switch writes into the GDT to switch around the TLS segments when they are <4GB. Or in pre NPTL the same would be done for the LDT also used for TLS. > asking this because the per-cpu IDTs that Andi refered in the future. > If we are really not using too many bytes in GDT then rest of the page > can be used for IDT and such mostly RO data. Once I implement that it can be shared with that page. -Andi ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [discuss] [patch] x86_64: align and pad x86_64 GDT on page boundary 2005-12-09 22:59 ` Andi Kleen @ 2005-12-09 23:46 ` Ravikiran G Thirumalai 0 siblings, 0 replies; 18+ messages in thread From: Ravikiran G Thirumalai @ 2005-12-09 23:46 UTC (permalink / raw) To: Andi Kleen Cc: Rohit Seth, Andrew Morton, linux-kernel, discuss, zach, shai, nippung On Fri, Dec 09, 2005 at 11:59:21PM +0100, Andi Kleen wrote: > On Fri, Dec 09, 2005 at 03:01:27PM -0800, Rohit Seth wrote: > > > > For the BP case it's ok as > > > > long as the beginning is correctly aligned and the rest > > > > is read-only. > > > > > > Just that any writes on the bp GDT will invalidate the idt_table cacheline, > > > which is read mostly (as Nippun pointed out). So could we keep the padding > > > as it is for the BP too? > > > > > > > Do you write into GDT often for this to be an issue. The reason I'm > > The context switch writes into the GDT to switch around the TLS segments Yup. > > > asking this because the per-cpu IDTs that Andi refered in the future. > > If we are really not using too many bytes in GDT then rest of the page > > can be used for IDT and such mostly RO data. > > Once I implement that it can be shared with that page. Yes, that will be nice. Thanks, Kiran ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [discuss] [patch] x86_64: align and pad x86_64 GDT on page boundary 2005-12-09 22:19 ` Ravikiran G Thirumalai 2005-12-09 22:57 ` Andi Kleen 2005-12-09 23:01 ` Rohit Seth @ 2005-12-12 2:34 ` Zwane Mwaikambo 2005-12-12 2:31 ` Andi Kleen 2005-12-12 2:38 ` Zwane Mwaikambo 2 siblings, 2 replies; 18+ messages in thread From: Zwane Mwaikambo @ 2005-12-12 2:34 UTC (permalink / raw) To: Ravikiran G Thirumalai Cc: Andi Kleen, Rohit Seth, Andrew Morton, linux-kernel, discuss, zach, shai, nippung On Fri, 9 Dec 2005, Ravikiran G Thirumalai wrote: > > For the BP case it's ok as > > long as the beginning is correctly aligned and the rest > > is read-only. > > Just that any writes on the bp GDT will invalidate the idt_table cacheline, > which is read mostly (as Nippun pointed out). So could we keep the padding > as it is for the BP too? But how often is this occuring? I presume this is for the virtualisation case only? Thanks ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [discuss] [patch] x86_64: align and pad x86_64 GDT on page boundary 2005-12-12 2:34 ` Zwane Mwaikambo @ 2005-12-12 2:31 ` Andi Kleen 2005-12-12 2:38 ` Zwane Mwaikambo 1 sibling, 0 replies; 18+ messages in thread From: Andi Kleen @ 2005-12-12 2:31 UTC (permalink / raw) To: Zwane Mwaikambo Cc: Ravikiran G Thirumalai, Andi Kleen, Rohit Seth, Andrew Morton, linux-kernel, discuss, zach, shai, nippung On Sun, Dec 11, 2005 at 06:34:16PM -0800, Zwane Mwaikambo wrote: > On Fri, 9 Dec 2005, Ravikiran G Thirumalai wrote: > > > > For the BP case it's ok as > > > long as the beginning is correctly aligned and the rest > > > is read-only. > > > > Just that any writes on the bp GDT will invalidate the idt_table cacheline, > > which is read mostly (as Nippun pointed out). So could we keep the padding > > as it is for the BP too? > > But how often is this occuring? I presume this is for the virtualisation GDT writes happen often if you use threads with TLS. > case only? In this case for programs running on ScaleX86's hypervisor yes. -Andi ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [discuss] [patch] x86_64: align and pad x86_64 GDT on page boundary 2005-12-12 2:34 ` Zwane Mwaikambo 2005-12-12 2:31 ` Andi Kleen @ 2005-12-12 2:38 ` Zwane Mwaikambo 1 sibling, 0 replies; 18+ messages in thread From: Zwane Mwaikambo @ 2005-12-12 2:38 UTC (permalink / raw) To: Ravikiran G Thirumalai Cc: Andi Kleen, Rohit Seth, Andrew Morton, linux-kernel, discuss, zach, shai, nippung On Sun, 11 Dec 2005, Zwane Mwaikambo wrote: > On Fri, 9 Dec 2005, Ravikiran G Thirumalai wrote: > > > > For the BP case it's ok as > > > long as the beginning is correctly aligned and the rest > > > is read-only. > > > > Just that any writes on the bp GDT will invalidate the idt_table cacheline, > > which is read mostly (as Nippun pointed out). So could we keep the padding > > as it is for the BP too? > > But how often is this occuring? I presume this is for the virtualisation > case only? Ignore this it's a misfire from an old postponed message (i saw the reply by Andi). ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [discuss] [patch] x86_64: align and pad x86_64 GDT on page boundary 2005-12-08 23:45 ` Rohit Seth 2005-12-08 23:43 ` Andi Kleen @ 2005-12-08 23:50 ` Rohit Seth 1 sibling, 0 replies; 18+ messages in thread From: Rohit Seth @ 2005-12-08 23:50 UTC (permalink / raw) To: Andi Kleen Cc: Ravikiran G Thirumalai, Andrew Morton, linux-kernel, discuss, zach, shai, nippung On Thu, 2005-12-08 at 15:45 -0800, Rohit Seth wrote: > > > Agreed. > > Though do we need to have full page allocated for each gdt (256 bytes) > then? ...possibly use kmalloc. > sorry, forgot about the false sharing part. -rohit ^ permalink raw reply [flat|nested] 18+ messages in thread
end of thread, other threads:[~2005-12-12 2:32 UTC | newest] Thread overview: 18+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2005-12-08 19:42 [patch] x86_64: align and pad x86_64 GDT on page boundary Ravikiran G Thirumalai 2005-12-08 20:15 ` [discuss] " Andi Kleen 2005-12-08 21:55 ` Ravikiran G Thirumalai 2005-12-08 23:09 ` Rohit Seth 2005-12-08 23:11 ` Andi Kleen 2005-12-08 23:26 ` Rohit Seth 2005-12-08 23:26 ` Andi Kleen 2005-12-08 23:45 ` Rohit Seth 2005-12-08 23:43 ` Andi Kleen 2005-12-09 22:19 ` Ravikiran G Thirumalai 2005-12-09 22:57 ` Andi Kleen 2005-12-09 23:01 ` Rohit Seth 2005-12-09 22:59 ` Andi Kleen 2005-12-09 23:46 ` Ravikiran G Thirumalai 2005-12-12 2:34 ` Zwane Mwaikambo 2005-12-12 2:31 ` Andi Kleen 2005-12-12 2:38 ` Zwane Mwaikambo 2005-12-08 23:50 ` Rohit Seth
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox