* [patch] tls-2.5.30-A1
@ 2002-08-07 18:10 Ingo Molnar
2002-08-07 18:33 ` Linus Torvalds
` (2 more replies)
0 siblings, 3 replies; 47+ messages in thread
From: Ingo Molnar @ 2002-08-07 18:10 UTC (permalink / raw)
To: Linus Torvalds; +Cc: linux-kernel, Alexandre Julliard
[-- Attachment #1: Type: TEXT/PLAIN, Size: 5643 bytes --]
the attached patch (against BK-curr + Luca Barbieri's two TLS patches)
does two things:
- it implements a second TLS entry for Wine's purposes.
Alexandre suggested that Wine would need two TLS entries, one for glibc
(in %gs), and one for the Win32 API (in %fs). The constant selector is
also a speedup for switches to/from 16-bit mode.
i left the possibility open to add even more TLS entries, but i find it
very unlikely to happen. So the code does not iterate over an array of TLS
descriptors, for performance reasons. This can be changed anytime without
affecting the userspace interface.
- the patch adds the get_thread_area() system-call.
the get_thread_area() call is needed by debuggers, to be able to read the
TLS settings of a threaded application, without having to assume anything
about what was loaded. The get_thread_area() call does not expose any
segmentation details - it returns the TLS info in the same format as
passed to the set_thread_area() call.
i've also attached tls.c which shows off both extensions. These extensions
are source and binary-compatible with any potential TLS code.
Ingo
--- linux/arch/i386/kernel/process.c.orig Wed Aug 7 19:16:45 2002
+++ linux/arch/i386/kernel/process.c Wed Aug 7 19:40:27 2002
@@ -839,6 +839,7 @@
asmlinkage int sys_set_thread_area(unsigned long base, unsigned long flags)
{
struct thread_struct *t = ¤t->thread;
+ struct desc_struct *desc;
int writable = 0;
int cpu;
@@ -848,21 +849,62 @@
if (flags & TLS_FLAG_WRITABLE)
writable = 1;
+ desc = &t->tls_desc1;
+ if (flags & TLS_FLAG_ENTRY2)
+ desc = &t->tls_desc2;
/*
* We must not get preempted while modifying the TLS.
*/
cpu = get_cpu();
- t->tls_desc.a = ((base & 0x0000ffff) << 16) | 0xffff;
+ desc->a = ((base & 0x0000ffff) << 16) | 0xffff;
- t->tls_desc.b = (base & 0xff000000) | ((base & 0x00ff0000) >> 16) |
+ desc->b = (base & 0xff000000) | ((base & 0x00ff0000) >> 16) |
0xf0000 | (writable << 9) | (1 << 15) |
(1 << 22) | (1 << 23) | 0x7000;
load_TLS_desc(t, cpu);
put_cpu();
- return TLS_ENTRY*8 + 3;
+ if (flags & TLS_FLAG_ENTRY2)
+ return TLS_ENTRY2*8 + 3;
+ else
+ return TLS_ENTRY1*8 + 3;
+}
+
+/*
+ * Get the current Thread-Local Storage area:
+ */
+
+#define GET_BASE(desc) \
+( (((desc).a >> 16) & 0x0000ffff) | \
+ (((desc).b << 16) & 0x00ff0000) | \
+ ( (desc).b & 0xff000000) )
+
+#define GET_WRITABLE(desc) \
+ (((desc).b >> 9) & 0x00000001)
+
+asmlinkage int sys_get_thread_area(unsigned long *ubase, unsigned long *uflags,
+ unsigned long flags)
+{
+ struct thread_struct *thread = ¤t->thread;
+ unsigned long base, flg;
+
+ if (flags & ~TLS_FLAGS_MASK)
+ return -EINVAL;
+
+ if (flags & TLS_FLAG_ENTRY2) {
+ base = GET_BASE(thread->tls_desc2);
+ flg = GET_WRITABLE(thread->tls_desc2) | TLS_FLAG_ENTRY2;
+ } else {
+ base = GET_BASE(thread->tls_desc1);
+ flg = GET_WRITABLE(thread->tls_desc1) | TLS_FLAG_ENTRY1;
+ }
+ if (copy_to_user(ubase, &base, sizeof(base)))
+ return -EFAULT;
+ if (copy_to_user(uflags, &flg, sizeof(flg)))
+ return -EFAULT;
+ return 0;
}
--- linux/arch/i386/kernel/entry.S.orig Wed Aug 7 19:18:33 2002
+++ linux/arch/i386/kernel/entry.S Wed Aug 7 19:18:21 2002
@@ -753,6 +753,7 @@
.long sys_sched_setaffinity
.long sys_sched_getaffinity
.long sys_set_thread_area
+ .long sys_get_thread_area
.rept NR_syscalls-(.-sys_call_table)/4
.long sys_ni_syscall
--- linux/include/asm-i386/processor.h.orig Wed Aug 7 19:22:57 2002
+++ linux/include/asm-i386/processor.h Wed Aug 7 19:27:01 2002
@@ -376,8 +376,8 @@
unsigned long v86flags, v86mask, v86mode, saved_esp0;
/* IO permissions */
unsigned long *ts_io_bitmap;
-/* TLS cached descriptor */
- struct desc_struct tls_desc;
+/* TLS cached descriptors */
+ struct desc_struct tls_desc1, tls_desc2;
};
#define INIT_THREAD { \
--- linux/include/asm-i386/unistd.h.orig Wed Aug 7 19:18:45 2002
+++ linux/include/asm-i386/unistd.h Wed Aug 7 19:18:58 2002
@@ -248,6 +248,7 @@
#define __NR_sched_setaffinity 241
#define __NR_sched_getaffinity 242
#define __NR_set_thread_area 243
+#define __NR_get_thread_area 244
/* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */
--- linux/include/asm-i386/desc.h.orig Wed Aug 7 19:20:57 2002
+++ linux/include/asm-i386/desc.h Wed Aug 7 19:51:13 2002
@@ -12,7 +12,7 @@
* 3 - kernel data segment
* 4 - user code segment <==== new cacheline
* 5 - user data segment
- * 6 - Thread-Local Storage (TLS) segment
+ * 6 - Thread-Local Storage (TLS) segment #1
* 7 - LDT
* 8 - APM BIOS support <==== new cacheline
* 9 - APM BIOS support
@@ -23,12 +23,13 @@
* 14 - PNPBIOS support
* 15 - PNPBIOS support
* 16 - PNPBIOS support <==== new cacheline
- * 17 - not used
+ * 17 - TLS segment #2
* 18 - not used
* 19 - not used
*/
#define TSS_ENTRY 1
-#define TLS_ENTRY 6
+#define TLS_ENTRY1 6
+#define TLS_ENTRY2 17
#define LDT_ENTRY 7
/*
* The interrupt descriptor table has room for 256 idt's,
@@ -86,13 +87,16 @@
_set_tssldt_desc(&cpu_gdt_table[cpu][LDT_ENTRY], (int)addr, ((size << 3)-1), 0x82);
}
-#define TLS_FLAGS_MASK 0x00000001
+#define TLS_FLAGS_MASK 0x00000003
#define TLS_FLAG_WRITABLE 0x00000001
+#define TLS_FLAG_ENTRY1 0x00000000
+#define TLS_FLAG_ENTRY2 0x00000002
static inline void load_TLS_desc(struct thread_struct *t, unsigned int cpu)
{
- cpu_gdt_table[cpu][TLS_ENTRY] = t->tls_desc;
+ cpu_gdt_table[cpu][TLS_ENTRY1] = t->tls_desc1;
+ cpu_gdt_table[cpu][TLS_ENTRY2] = t->tls_desc2;
}
static inline void clear_LDT(void)
[-- Attachment #2: Type: TEXT/PLAIN, Size: 3698 bytes --]
#include <asm/ldt.h>
#include <stdio.h>
#include <linux/unistd.h>
#include <signal.h>
#include <unistd.h>
#include <stdlib.h>
#include <pthread.h>
#include <asm/sigcontext.h>
/*
* TLS functionality testing utility.
*/
#define TLS_FLAGS_MASK 0x00000003
#define TLS_FLAG_WRITABLE 0x00000001
#define TLS_FLAG_ENTRY2 0x00000002
#define __NR_set_thread_area 243
_syscall2(int, set_thread_area, unsigned int, base, unsigned int, flags)
#define __NR_get_thread_area 244
_syscall3(int, get_thread_area, unsigned int *, ubase, unsigned int *, uflags, unsigned int, flags)
static inline void initseg (int seg)
{
asm ("mov %w0,%%fs" : : "r" (seg));
}
static inline unsigned char __readseg (unsigned offset)
{
unsigned char res;
asm ("fs; movb (%1),%%al" : "=a" (res) : "r" (offset));
return res;
}
static inline void __writeseg (unsigned offset, unsigned char b)
{
asm ("fs; movb %b1,(%0)" : : "r" (offset), "r" (b));
}
static void readseg (void *dst, const void *src)
{
*(char *)dst = __readseg((unsigned int)src);
}
static void writeseg (void *dst, unsigned char value)
{
__writeseg((unsigned int)dst, value);
}
unsigned char pre_data [4096] = { [ 0 ... 4095 ] = 33 };
unsigned char data [4096] = { [ 0 ... 4095 ] = 44 };
unsigned char post_data [4096] = { [ 0 ... 4095 ] = 55 };
int main (void)
{
unsigned int base, flags;
int seg, ret;
unsigned char result;
data[0] = 123;
data[4096] = 210;
base = 0;
printf("\ndoing set_thread_area(0x%08x, writable):\n", base);
seg = set_thread_area(base, TLS_FLAG_WRITABLE);
printf("====> got GDT selector: 0x%x", seg);
if (seg != 51) {
printf(" ERROR: incorrect selector!\n");
exit(-1);
} else
printf(" --- TEST PASSED.\n");
initseg(seg);
printf("\nreading first byte of [0x%08x] TLS:\n", base);
readseg (&result, &data);
if (result == 123)
printf("====> %d --- TEST PASSED.\n\n", result);
else
printf("====> %d --- TEST FAILURE!\n\n", result);
base = (unsigned int)&data;
printf("doing set_thread_area(0x%08x, writable, entry2):\n", base);
seg = set_thread_area(base, TLS_FLAG_WRITABLE | TLS_FLAG_ENTRY2);
initseg(seg);
printf("====> got GDT selector: 0x%x", seg);
if (seg != 0x8b) {
printf(" ERROR: incorrect selector!\n");
exit(-1);
} else
printf(" --- TEST PASSED.\n");
printf("context-switching once ...\n");
sleep(1);
printf("\nreading first byte of 4K [0x%08x] TLS:\n", base);
readseg (&result, 0);
if (result == 123)
printf("====> %d --- TEST PASSED.\n\n", result);
else
printf("====> %d --- TEST FAILURE!\n\n", result);
printf("reading last byte of 4097 byte [0x%08x] TLS:\n", base);
readseg (&result, (void *)4096);
if (result == 210)
printf("====> %d --- TEST PASSED.\n\n", result);
else
printf("====> %d --- TEST FAILURE!\n\n", result);
printf("writing last byte of 4097 byte [0x%08x] TLS:\n", base);
writeseg ((void *)4096, 234);
readseg (&result, (void *)4096);
if (result == 234)
printf("====> %d --- TEST PASSED.\n", result);
else
printf("====> %d --- TEST FAILURE!.\n", result);
printf("\nreading byte outside of the TLS (should not coredump)...\n\n");
readseg (&result, (void *)4097);
printf("result: %d.\n", result);
printf("doing get_thread_area(0x%08x, writable, entry2):\n", base);
base = flags = 1234;
ret = get_thread_area(&base, &flags, TLS_FLAG_WRITABLE | TLS_FLAG_ENTRY2);
if (!ret)
printf("====> [%08x, %d] %d --- TEST PASSED.\n", base, flags, ret);
else
printf("====> [%08x, %d] %d --- TEST FAILURE!.\n", base, flags, ret);
return 0;
}
^ permalink raw reply [flat|nested] 47+ messages in thread* Re: [patch] tls-2.5.30-A1 2002-08-07 18:10 [patch] tls-2.5.30-A1 Ingo Molnar @ 2002-08-07 18:33 ` Linus Torvalds 2002-08-07 18:43 ` Stephen Rothwell ` (4 more replies) 2002-08-07 19:02 ` [patch] tls-2.5.30-A1 Christoph Hellwig 2002-08-08 12:25 ` Jamie Lokier 2 siblings, 5 replies; 47+ messages in thread From: Linus Torvalds @ 2002-08-07 18:33 UTC (permalink / raw) To: Ingo Molnar; +Cc: linux-kernel, Alexandre Julliard, Luca Barbieri On Wed, 7 Aug 2002, Ingo Molnar wrote: > > the attached patch (against BK-curr + Luca Barbieri's two TLS patches) > does two things: > > - it implements a second TLS entry for Wine's purposes. Guys, I really don't like how the segment map ends up getting uglier and uglier. I would suggest: - move all kernel-related (and thus non-visible to user space) segments up, and make the cacheline optimizations _there_. - keep the TLS entries contiguous, and make sure that segment 0040 (ie GDT entry #8) is available to a TLS entry, since if I remember correctly, that one is also magical for old Windows binaries for all the wrong reasons (ie it was some system data area in DOS and in Windows 3.1) - and for cleanliness bonus points: make the regular user data segments just another TLS segment that just happens to have default values. If the user wants to screw with its own segments, let it. Then, for double extra bonus points somebody should look into whether those damn PnP BIOS segments could be simply made to be TLS segments during module init. I don't know if that PnP stuff is required later or not. Linus ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [patch] tls-2.5.30-A1 2002-08-07 18:33 ` Linus Torvalds @ 2002-08-07 18:43 ` Stephen Rothwell 2002-08-07 18:57 ` Linus Torvalds 2002-08-07 19:31 ` Ingo Molnar ` (3 subsequent siblings) 4 siblings, 1 reply; 47+ messages in thread From: Stephen Rothwell @ 2002-08-07 18:43 UTC (permalink / raw) To: Linus Torvalds; +Cc: mingo, linux-kernel, julliard, ldb On Wed, 7 Aug 2002 11:33:23 -0700 (PDT) Linus Torvalds <torvalds@transmeta.com> wrote: > > - keep the TLS entries contiguous, and make sure that segment 0040 (ie > GDT entry #8) is available to a TLS entry, since if I remember > correctly, that one is also magical for old Windows binaries for all > the wrong reasons (ie it was some system data area in DOS and in > Windows 3.1) segment 0040 is used by the APM driver to work around bugs in some BIOS implementations where some (brain-dead) BIOS writer has assume that the BIOS data area is still available in protected mode ... -- Cheers, Stephen Rothwell sfr@canb.auug.org.au http://www.canb.auug.org.au/~sfr/ ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [patch] tls-2.5.30-A1 2002-08-07 18:43 ` Stephen Rothwell @ 2002-08-07 18:57 ` Linus Torvalds 2002-08-07 19:40 ` Alexandre Julliard 0 siblings, 1 reply; 47+ messages in thread From: Linus Torvalds @ 2002-08-07 18:57 UTC (permalink / raw) To: Stephen Rothwell; +Cc: mingo, linux-kernel, julliard, ldb On Thu, 8 Aug 2002, Stephen Rothwell wrote: > On Wed, 7 Aug 2002 11:33:23 -0700 (PDT) Linus Torvalds <torvalds@transmeta.com> wrote: > > > > - keep the TLS entries contiguous, and make sure that segment 0040 (ie > > GDT entry #8) is available to a TLS entry, since if I remember > > correctly, that one is also magical for old Windows binaries for all > > the wrong reasons (ie it was some system data area in DOS and in > > Windows 3.1) > > segment 0040 is used by the APM driver to work around bugs in some BIOS > implementations where some (brain-dead) BIOS writer has assume that the > BIOS data area is still available in protected mode ... Ok, sounds like that one ends up having to be a fixed segment (I wonder if Wine can take advantage of it? looks like it is hardcoded to base 0x400, which is probably fine for Wine anyway - just map something at the right address - but it looks CPL0 only? Might be ok to just make it available to user space). Linus ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [patch] tls-2.5.30-A1 2002-08-07 18:57 ` Linus Torvalds @ 2002-08-07 19:40 ` Alexandre Julliard 0 siblings, 0 replies; 47+ messages in thread From: Alexandre Julliard @ 2002-08-07 19:40 UTC (permalink / raw) To: Linus Torvalds; +Cc: Stephen Rothwell, mingo, linux-kernel, ldb Linus Torvalds <torvalds@transmeta.com> writes: > Ok, sounds like that one ends up having to be a fixed segment (I wonder if > Wine can take advantage of it? looks like it is hardcoded to base 0x400, > which is probably fine for Wine anyway - just map something at the right > address - but it looks CPL0 only? Might be ok to just make it available to > user space). Base 0x400 should work just fine for Wine, we already need to have the BIOS data mapped there anyway, so simply making the selector available to user space would work completely transparently for us. We are currently trapping and emulating accesses to that selector so it doesn't matter much whether it is protected or not, except for a small performance gain. What would break Wine is if that selector was made accessible to user space with a different base address, so this should be avoided. -- Alexandre Julliard julliard@winehq.com ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [patch] tls-2.5.30-A1 2002-08-07 18:33 ` Linus Torvalds 2002-08-07 18:43 ` Stephen Rothwell @ 2002-08-07 19:31 ` Ingo Molnar 2002-08-07 19:49 ` Alexandre Julliard 2002-08-07 22:01 ` Alan Cox ` (2 subsequent siblings) 4 siblings, 1 reply; 47+ messages in thread From: Ingo Molnar @ 2002-08-07 19:31 UTC (permalink / raw) To: Linus Torvalds; +Cc: linux-kernel, Alexandre Julliard, Luca Barbieri On Wed, 7 Aug 2002, Linus Torvalds wrote: > I would suggest: > - move all kernel-related (and thus non-visible to user space) segments > up, and make the cacheline optimizations _there_. > - keep the TLS entries contiguous, and make sure that segment 0040 (ie > GDT entry #8) is available to a TLS entry, since if I remember > correctly, that one is also magical for old Windows binaries for all > the wrong reasons (ie it was some system data area in DOS and in > Windows 3.1) > - and for cleanliness bonus points: make the regular user data segments > just another TLS segment that just happens to have default values. If > the user wants to screw with its own segments, let it. i'll do this. Julliard, any additional suggestions perhaps - is GDT entry 8 the best %fs choice for Wine? Ingo ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [patch] tls-2.5.30-A1 2002-08-07 19:31 ` Ingo Molnar @ 2002-08-07 19:49 ` Alexandre Julliard 0 siblings, 0 replies; 47+ messages in thread From: Alexandre Julliard @ 2002-08-07 19:49 UTC (permalink / raw) To: Ingo Molnar; +Cc: Linus Torvalds, linux-kernel, Luca Barbieri Ingo Molnar <mingo@elte.hu> writes: > i'll do this. Julliard, any additional suggestions perhaps - is GDT entry > 8 the best %fs choice for Wine? No, this one is special and has to point to 0x400, so it's actually the only one that wouldn't work to use as %fs in Wine. -- Alexandre Julliard julliard@winehq.com ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [patch] tls-2.5.30-A1 2002-08-07 18:33 ` Linus Torvalds 2002-08-07 18:43 ` Stephen Rothwell 2002-08-07 19:31 ` Ingo Molnar @ 2002-08-07 22:01 ` Alan Cox 2002-08-07 22:36 ` Luca Barbieri 2002-08-11 21:46 ` [patch] tls-2.5.31-C3 Ingo Molnar 4 siblings, 0 replies; 47+ messages in thread From: Alan Cox @ 2002-08-07 22:01 UTC (permalink / raw) To: Linus Torvalds Cc: Ingo Molnar, linux-kernel, Alexandre Julliard, Luca Barbieri On Wed, 2002-08-07 at 19:33, Linus Torvalds wrote: > - keep the TLS entries contiguous, and make sure that segment 0040 (ie > GDT entry #8) is available to a TLS entry, since if I remember > correctly, that one is also magical for old Windows binaries for all > the wrong reasons (ie it was some system data area in DOS and in > Windows 3.1) Lots of BIOSes (a million monkeys bashing on typewriters will write something that passes some BIOS vendor QA in about 2 seconds) illegally assume that 0040: points at the BIOS data segment 0040 when making APM32 calls. Sufficient that Windows makea it so and its never going to get corrected. > Then, for double extra bonus points somebody should look into whether > those damn PnP BIOS segments could be simply made to be TLS segments > during module init. I don't know if that PnP stuff is required later or > not. PnPBIOS has to rewrite segments as it goes for data passing. It doesnt really matter where you stuff them though. ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [patch] tls-2.5.30-A1 2002-08-07 18:33 ` Linus Torvalds ` (2 preceding siblings ...) 2002-08-07 22:01 ` Alan Cox @ 2002-08-07 22:36 ` Luca Barbieri 2002-08-07 22:54 ` Ingo Molnar 2002-08-11 21:46 ` [patch] tls-2.5.31-C3 Ingo Molnar 4 siblings, 1 reply; 47+ messages in thread From: Luca Barbieri @ 2002-08-07 22:36 UTC (permalink / raw) To: Linus Torvalds; +Cc: Ingo Molnar, Linux-Kernel ML [-- Attachment #1: Type: text/plain, Size: 12054 bytes --] On Wed, 2002-08-07 at 20:33, Linus Torvalds wrote: > > On Wed, 7 Aug 2002, Ingo Molnar wrote: > > > > the attached patch (against BK-curr + Luca Barbieri's two TLS patches) > > does two things: > > > > - it implements a second TLS entry for Wine's purposes. > > Guys, I really don't like how the segment map ends up getting uglier and > uglier. > > I would suggest: > - move all kernel-related (and thus non-visible to user space) segments > up, and make the cacheline optimizations _there_. Done. > - keep the TLS entries contiguous, and make sure that segment 0040 (ie > GDT entry #8) is available to a TLS entry, since if I remember > correctly, that one is also magical for old Windows binaries for all > the wrong reasons (ie it was some system data area in DOS and in > Windows 3.1) Done. Segment 0x40 set to CPL 3. > - and for cleanliness bonus points: make the regular user data segments > just another TLS segment that just happens to have default values. If > the user wants to screw with its own segments, let it. Bad idea: makes task switch slower without any practical advantage. The user may load a TLS or LDT selector in %ds to get the same effect. > Then, for double extra bonus points somebody should look into whether > those damn PnP BIOS segments could be simply made to be TLS segments > during module init. I don't know if that PnP stuff is required later or > not. Not sure what you mean. The current definition of TLS segments is "a minimal number of GDT entries that are modified on task switch and that can be set on a per-task basis so that the selectors can be loaded %fs and %gs". How can kernel PNPBIOS segments fit in this definition? The patch changes the descriptior layout so that LDT is in the kernel segment cacheline, the 16-bit APM segments are together and user segments are together. It also sets segment 0x40 CPL to 3. __BOOT_CS and __BOOT_DS are introduced as the value of segment selectors during boot (so that we don't have to enlarge the gdt in setup.s). New layout: * 0 - null * 1 - PNPBIOS support (16->32 gate) * 2 - boot code segment * 3 - boot data segment * 4 - PNPBIOS support <==== new cacheline * 5 - PNPBIOS support * 6 - PNPBIOS support * 7 - PNPBIOS support * 8 - APM BIOS support (0x400-0x1000)<==== new cacheline * 9 - APM BIOS support * 10 - APM BIOS support * 11 - APM BIOS support * 12 - kernel code segment <==== new cacheline * 13 - kernel data segment * 14 - TSS * 15 - LDT * ------- start of user segments * 16 - user code segment <==== new cacheline * 17 - user data segment * 18 - Thread-Local Storage (TLS) segment #1 * 19 - Thread-Local Storage (TLS) segment #2 diff --exclude-from=/home/ldb/src/linux-exclude -urNd a/arch/i386/boot/compressed/head.S b/arch/i386/boot/compressed/head.S --- a/arch/i386/boot/compressed/head.S 2002-07-20 21:12:21.000000000 +0200 +++ b/arch/i386/boot/compressed/head.S 2002-08-08 00:14:45.000000000 +0200 @@ -31,7 +31,7 @@ startup_32: cld cli - movl $(__KERNEL_DS),%eax + movl $(__BOOT_DS),%eax movl %eax,%ds movl %eax,%es movl %eax,%fs @@ -74,7 +74,7 @@ popl %esi # discard address popl %esi # real mode pointer xorl %ebx,%ebx - ljmp $(__KERNEL_CS), $0x100000 + ljmp $(__BOOT_CS), $0x100000 /* * We come here, if we were loaded high. @@ -101,7 +101,7 @@ popl %eax # hcount movl $0x100000,%edi cli # make sure we don't get interrupted - ljmp $(__KERNEL_CS), $0x1000 # and jump to the move routine + ljmp $(__BOOT_CS), $0x1000 # and jump to the move routine /* * Routine (template) for moving the decompressed kernel in place, @@ -124,5 +124,5 @@ movsl movl %ebx,%esi # Restore setup pointer xorl %ebx,%ebx - ljmp $(__KERNEL_CS), $0x100000 + ljmp $(__BOOT_CS), $0x100000 move_routine_end: diff --exclude-from=/home/ldb/src/linux-exclude -urNd a/arch/i386/boot/compressed/misc.c b/arch/i386/boot/compressed/misc.c --- a/arch/i386/boot/compressed/misc.c 2002-07-20 21:11:24.000000000 +0200 +++ b/arch/i386/boot/compressed/misc.c 2002-08-07 23:48:58.000000000 +0200 @@ -299,7 +299,7 @@ struct { long * a; short b; - } stack_start = { & user_stack [STACK_SIZE] , __KERNEL_DS }; + } stack_start = { & user_stack [STACK_SIZE] , __BOOT_DS }; static void setup_normal_output_buffer(void) { diff --exclude-from=/home/ldb/src/linux-exclude -urNd a/arch/i386/boot/setup.S b/arch/i386/boot/setup.S --- a/arch/i386/boot/setup.S 2002-07-20 21:11:05.000000000 +0200 +++ b/arch/i386/boot/setup.S 2002-08-08 00:14:30.000000000 +0200 @@ -801,7 +801,7 @@ subw $DELTA_INITSEG, %si shll $4, %esi # Convert to 32-bit pointer # NOTE: For high loaded big kernels we need a -# jmpi 0x100000,__KERNEL_CS +# jmpi 0x100000,__BOOT_CS # # but we yet haven't reloaded the CS register, so the default size # of the target offset still is 16 bit. @@ -812,7 +812,7 @@ .byte 0x66, 0xea # prefix + jmpi-opcode code32: .long 0x1000 # will be set to 0x100000 # for big kernels - .word __KERNEL_CS + .word __BOOT_CS # Here's a bunch of information about your current kernel.. kernel_version: .ascii UTS_RELEASE diff --exclude-from=/home/ldb/src/linux-exclude -urNd a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S --- a/arch/i386/kernel/head.S 2002-08-07 19:03:24.000000000 +0200 +++ b/arch/i386/kernel/head.S 2002-08-08 00:08:48.000000000 +0200 @@ -46,7 +46,7 @@ * Set segments to known values */ cld - movl $(__KERNEL_DS),%eax + movl $(__BOOT_DS),%eax movl %eax,%ds movl %eax,%es movl %eax,%fs @@ -239,12 +239,7 @@ movl %eax,%es movl %eax,%fs movl %eax,%gs -#ifdef CONFIG_SMP - movl $(__KERNEL_DS), %eax - movl %eax,%ss # Reload the stack pointer (segment only) -#else - lss stack_start,%esp # Load processor stack -#endif + movl %eax,%ss xorl %eax,%eax lldt %ax cld # gcc2 wants the direction flag cleared at all times @@ -311,7 +306,7 @@ ENTRY(stack_start) .long init_thread_union+8192 - .long __KERNEL_DS + .long __BOOT_DS /* This is the default interrupt "handler" :-) */ int_msg: @@ -415,31 +410,30 @@ * The Global Descriptor Table contains 20 quadwords, per-CPU. */ ENTRY(cpu_gdt_table) - .quad 0x0000000000000000 /* NULL descriptor */ - .quad 0x0000000000000000 /* TSS descriptor */ - .quad 0x00cf9a000000ffff /* 0x10 kernel 4GB code at 0x00000000 */ - .quad 0x00cf92000000ffff /* 0x18 kernel 4GB data at 0x00000000 */ - .quad 0x00cffa000000ffff /* 0x23 user 4GB code at 0x00000000 */ - .quad 0x00cff2000000ffff /* 0x2b user 4GB data at 0x00000000 */ - .quad 0x0000000000000000 /* TLS descriptor */ - .quad 0x0000000000000000 /* LDT descriptor */ + .quad 0x0000000000000000 /* 0x00 NULL descriptor */ + .quad 0x00c09a0000000000 /* 0x08 PNPBIOS 32-bit code */ + .quad 0x00cf9a000000ffff /* 0x10 boot 4GB code at 0x00000000 */ + .quad 0x00cf92000000ffff /* 0x18 boot 4GB data at 0x00000000 */ + .quad 0x00809a0000000000 /* 0x20 PNPBIOS 16-bit code */ + .quad 0x0080920000000000 /* 0x28 PNPBIOS 16-bit data */ + .quad 0x0080920000000000 /* 0x30 PNPBIOS 16-bit data */ + .quad 0x0080920000000000 /* 0x38 PNPBIOS 16-bit data */ /* * The APM segments have byte granularity and their bases * and limits are set at run time. */ - .quad 0x0040920000000000 /* 0x40 APM set up for bad BIOS's */ + .quad 0x0040f20000000000 /* 0x40 APM set up for bad BIOS's */ .quad 0x00409a0000000000 /* 0x48 APM CS code */ .quad 0x00009a0000000000 /* 0x50 APM CS 16 code (16 bit) */ .quad 0x0040920000000000 /* 0x58 APM DS data */ - /* Segments used for calling PnP BIOS */ - .quad 0x00c09a0000000000 /* 0x60 32-bit code */ - .quad 0x00809a0000000000 /* 0x68 16-bit code */ - .quad 0x0080920000000000 /* 0x70 16-bit data */ - .quad 0x0080920000000000 /* 0x78 16-bit data */ - .quad 0x0080920000000000 /* 0x80 16-bit data */ - .quad 0x0000000000000000 /* 0x88 not used */ - .quad 0x0000000000000000 /* 0x90 not used */ - .quad 0x0000000000000000 /* 0x98 not used */ + .quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */ + .quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */ + .quad 0x0000000000000000 /* 0x70 TSS descriptor */ + .quad 0x0000000000000000 /* 0x78 LDT descriptor */ + .quad 0x00cffa000000ffff /* 0x80 user 4GB code at 0x00000000 */ + .quad 0x00cff2000000ffff /* 0x88 user 4GB data at 0x00000000 */ + .quad 0x0000000000000000 /* 0x90 TLS1 descriptor */ + .quad 0x0000000000000000 /* 0x98 TLS2 descriptor */ #if CONFIG_SMP .fill (NR_CPUS-1)*GDT_ENTRIES,8,0 /* other CPU's GDT */ diff --exclude-from=/home/ldb/src/linux-exclude -urNd a/drivers/pnp/pnpbios_core.c b/drivers/pnp/pnpbios_core.c --- a/drivers/pnp/pnpbios_core.c 2002-08-02 01:19:05.000000000 +0200 +++ b/drivers/pnp/pnpbios_core.c 2002-08-08 00:03:13.000000000 +0200 @@ -90,12 +90,13 @@ static union pnp_bios_expansion_header * pnp_bios_hdr = NULL; /* The PnP BIOS entries in the GDT */ -#define PNP_GDT (0x0060) -#define PNP_CS32 (PNP_GDT+0x00) /* segment for calling fn */ -#define PNP_CS16 (PNP_GDT+0x08) /* code segment for BIOS */ -#define PNP_DS (PNP_GDT+0x10) /* data segment for BIOS */ -#define PNP_TS1 (PNP_GDT+0x18) /* transfer data segment */ -#define PNP_TS2 (PNP_GDT+0x20) /* another data segment */ +#define PNP_CS32 (0x08) /* segment for calling fn */ + +#define PNP_GDT (0x20) +#define PNP_CS16 (PNP_GDT+0x00) /* code segment for BIOS */ +#define PNP_DS (PNP_GDT+0x08) /* data segment for BIOS */ +#define PNP_TS1 (PNP_GDT+0x10) /* transfer data segment */ +#define PNP_TS2 (PNP_GDT+0x18) /* another data segment */ /* * These are some opcodes for a "static asmlinkage" diff --exclude-from=/home/ldb/src/linux-exclude -urNd a/include/asm-i386/desc.h b/include/asm-i386/desc.h --- a/include/asm-i386/desc.h 2002-08-07 21:27:54.000000000 +0200 +++ b/include/asm-i386/desc.h 2002-08-08 00:12:01.000000000 +0200 @@ -7,30 +7,31 @@ * The layout of the per-CPU GDT under Linux: * * 0 - null - * 1 - TSS - * 2 - kernel code segment - * 3 - kernel data segment - * 4 - user code segment <==== new cacheline - * 5 - user data segment - * 6 - Thread-Local Storage (TLS) segment #1 - * 7 - LDT - * 8 - APM BIOS support <==== new cacheline + * 1 - PNPBIOS support (16->32 gate) + * 2 - boot code segment + * 3 - boot data segment + * 4 - PNPBIOS support <==== new cacheline + * 5 - PNPBIOS support + * 6 - PNPBIOS support + * 7 - PNPBIOS support + * 8 - APM BIOS support (0x400-0x1000)<==== new cacheline * 9 - APM BIOS support * 10 - APM BIOS support - * 11 - APM BIOS support - * 12 - PNPBIOS support <==== new cacheline - * 13 - PNPBIOS support - * 14 - PNPBIOS support - * 15 - PNPBIOS support - * 16 - PNPBIOS support <==== new cacheline - * 17 - TLS segment #2 - * 18 - not used - * 19 - not used + * 11 - APM BIOS support + * 12 - kernel code segment <==== new cacheline + * 13 - kernel data segment + * 14 - TSS + * 15 - LDT + * ------- start of user segments + * 16 - user code segment <==== new cacheline + * 17 - user data segment + * 18 - Thread-Local Storage (TLS) segment #1 + * 19 - Thread-Local Storage (TLS) segment #2 */ -#define TSS_ENTRY 1 -#define TLS_ENTRY1 6 -#define TLS_ENTRY2 17 -#define LDT_ENTRY 7 +#define TSS_ENTRY 14 +#define LDT_ENTRY 15 +#define TLS_ENTRY1 18 +#define TLS_ENTRY2 19 /* * The interrupt descriptor table has room for 256 idt's, * the global descriptor table is dependent on the number diff --exclude-from=/home/ldb/src/linux-exclude -urNd a/include/asm-i386/segment.h b/include/asm-i386/segment.h --- a/include/asm-i386/segment.h 2002-07-20 21:11:11.000000000 +0200 +++ b/include/asm-i386/segment.h 2002-08-07 23:50:08.000000000 +0200 @@ -1,10 +1,13 @@ #ifndef _ASM_SEGMENT_H #define _ASM_SEGMENT_H -#define __KERNEL_CS 0x10 -#define __KERNEL_DS 0x18 +#define __BOOT_CS 0x10 +#define __BOOT_DS 0x18 -#define __USER_CS 0x23 -#define __USER_DS 0x2B +#define __KERNEL_CS 0x60 +#define __KERNEL_DS 0x68 + +#define __USER_CS 0x83 +#define __USER_DS 0x8B #endif [-- Attachment #2: This is a digitally signed message part --] [-- Type: application/pgp-signature, Size: 189 bytes --] ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [patch] tls-2.5.30-A1 2002-08-07 22:36 ` Luca Barbieri @ 2002-08-07 22:54 ` Ingo Molnar 2002-08-07 23:21 ` Luca Barbieri 0 siblings, 1 reply; 47+ messages in thread From: Ingo Molnar @ 2002-08-07 22:54 UTC (permalink / raw) To: Luca Barbieri; +Cc: Linus Torvalds, Linux-Kernel ML On 8 Aug 2002, Luca Barbieri wrote: > > I would suggest: > > - move all kernel-related (and thus non-visible to user space) segments > > up, and make the cacheline optimizations _there_. > Done. > > - keep the TLS entries contiguous, and make sure that segment 0040 (ie > > GDT entry #8) is available to a TLS entry, since if I remember > > correctly, that one is also magical for old Windows binaries for all > > the wrong reasons (ie it was some system data area in DOS and in > > Windows 3.1) > Done. Segment 0x40 set to CPL 3. > > - and for cleanliness bonus points: make the regular user data segments > > just another TLS segment that just happens to have default values. If > > the user wants to screw with its own segments, let it. > Bad idea: makes task switch slower without any practical advantage. > The user may load a TLS or LDT selector in %ds to get the same effect. your patch looks good to me - as long as we want to keep those 2 TLS entries and nothing more. (which i believe we want.) If even more TLS entries are to be made possible then a cleaner TLS enumeration interface has to be used like Christoph mentioned - although i dont think we really want that, 3 or more entries would be a stretch i think. Ingo ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [patch] tls-2.5.30-A1 2002-08-07 22:54 ` Ingo Molnar @ 2002-08-07 23:21 ` Luca Barbieri 2002-08-07 23:35 ` DMA Problems with Intel 845 Chipset and Northwood CPU Mark Cuss 0 siblings, 1 reply; 47+ messages in thread From: Luca Barbieri @ 2002-08-07 23:21 UTC (permalink / raw) To: Ingo Molnar; +Cc: Linus Torvalds, Linux-Kernel ML, Christoph Hellwig [-- Attachment #1: Type: text/plain, Size: 2234 bytes --] > your patch looks good to me - as long as we want to keep those 2 TLS > entries and nothing more. (which i believe we want.) If even more TLS > entries are to be made possible then a cleaner TLS enumeration interface > has to be used like Christoph mentioned - although i dont think we really > want that, 3 or more entries would be a stretch i think. I think that 2 are enough. Flat 32-bit programs set ds=es=ss=__USER_DS and cs=__USER_CS so they only have fs and gs left. 16-bit programs and other odd ones can use the LDT support. As for the interface I would suggest replacing the current one with a single interface for LDT and GDT modifications that would provide the following parameters: unsigned table - LDT - GDTAVAIL: GDT starting from first TLS - GDTABS: GDT starting from 0 - AUTO: starts with the 2 TLS entries and proceeds with LDT unsigned operation - set: copy to kernel space (enlarge table if necessary). If root, don't check validity for speed, otherwise check to ensure the user is not e.g. putting call gates to CPL 0 code. - set1: like set, but passes a single entry directly in the num and ptr parameters - get: copy from kernel space - free: free memory and lower limits. If entry = 0 and num = ~0, completely frees table. - map: only for LDT and for root, allows to directly point to a user memory range - movekern: when support for per-task GDT is implemented, this would allow to change the entries used for kernel entries. This would be implemented with per-CPU IDTs and maybe dynamically generated code. Useful for virtualization programs. unsigned entry - first entry affected. ~0 for first unused entry. unsigned num - number of entries affected void* ptr - pointer to read/write entries from (table and operations may be merged) Return value: first entry changed e.g. libpthread would use table = AUTO, operation = set1, entry = ~0. For the LDT things would be implemented as usual. For the GDT the initial implementation would just modify TLS entries. In future, support for dynamically allocated per-task GDTs could be added. I would implement this by adding ops to sys_modify_ldt. BTW, tls_desc1/tls_desc2 would IMHO be better as gdt_desc[2]. I don't plan to implement this myself. [-- Attachment #2: This is a digitally signed message part --] [-- Type: application/pgp-signature, Size: 189 bytes --] ^ permalink raw reply [flat|nested] 47+ messages in thread
* DMA Problems with Intel 845 Chipset and Northwood CPU 2002-08-07 23:21 ` Luca Barbieri @ 2002-08-07 23:35 ` Mark Cuss 2002-08-08 0:58 ` John L. Korpi 0 siblings, 1 reply; 47+ messages in thread From: Mark Cuss @ 2002-08-07 23:35 UTC (permalink / raw) To: Linux-Kernel ML Hello all, Please accept my apologies if this question has already been answered.... I have a new Pentium 4 computer and I can't get the DMA working for hard disk transfers. Specifically, its a Dell Dimension 4500 with a Pentium 4 2.26 GHz processor (Northwood) and an 845 (Brookdale) chipset. RedHat 7.3.... I noticed that the hard disk transfers were very slow. I tried to set up DMA with hdparm - see below: [root@yoda ide]# hdparm /dev/hda /dev/hda: multcount = 16 (on) I/O support = 0 (default 16-bit) unmaskirq = 0 (off) using_dma = 0 (off) keepsettings = 0 (off) nowerr = 0 (off) readonly = 0 (off) readahead = 8 (on) geometry = 4866/255/63, sectors = 78177792, start = 0 busstate = 1 (on) [root@yoda ide]# hdparm -d1 /dev/hda /dev/hda: setting using_dma to 1 (on) HDIO_SET_DMA failed: Operation not permitted using_dma = 0 (off) [root@yoda ide]# I thought that perhaps the chipset had changed between this Northwood machine an the older core - I have a P4 1.8 GHz machine that DMA works fine on. I upgraded to kernel 2.4.19 with no change. The IDE controller (according to Windows XP....) is an Intel 82801DB Ultra ATA Storage Controller - 24CB. I've included the lspci --vvx listing from the problem machine below - my aplogies for the long list. If anyone has any suggestions I'd really appreciate them... Thanks.... Mark lspci listing: 00:00.0 Host bridge: Intel Corp. 82845 845 (Brookdale) Chipset Host Bridge (rev 11) Subsystem: Intel Corp. 82845 845 (Brookdale) Chipset Host Bridge Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR+ FastB2B- Status: Cap+ 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort+ >SERR- <PERR- Latency: 0 Region 0: Memory at f0000000 (32-bit, prefetchable) [size=128M] Capabilities: [e4] #09 [a104] Capabilities: [a0] AGP version 2.0 Status: RQ=31 SBA+ 64bit- FW+ Rate=x1,x2 Command: RQ=0 SBA- AGP+ 64bit- FW- Rate=<none> 00: 86 80 30 1a 06 01 90 20 11 00 00 06 00 00 00 00 10: 08 00 00 f0 00 00 00 00 00 00 00 00 00 00 00 00 20: 00 00 00 00 00 00 00 00 00 00 00 00 86 80 30 1a 30: 00 00 00 00 e4 00 00 00 00 00 00 00 00 00 00 00 00:01.0 PCI bridge: Intel Corp. 82845 845 (Brookdale) Chipset AGP Bridge (rev 11) (prog-if 00 [Normal decode]) Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR+ FastB2B- Status: Cap- 66Mhz+ UDF- FastB2B+ ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR- Latency: 64 Bus: primary=00, secondary=01, subordinate=01, sec-latency=64 Memory behind bridge: fc700000-fe7fffff Prefetchable memory behind bridge: dc300000-ec4fffff BridgeCtl: Parity- SERR+ NoISA- VGA+ MAbort- >Reset- FastB2B- 00: 86 80 31 1a 07 01 a0 00 11 00 04 06 00 40 01 00 10: 00 00 00 00 00 00 00 00 00 01 01 40 f0 00 a0 22 20: 70 fc 70 fe 30 dc 40 ec 00 00 00 00 00 00 00 00 30: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0a 00 00:1d.0 USB Controller: Intel Corp.: Unknown device 24c2 (rev 01) (prog-if 00 [UHCI]) Subsystem: Dell Computer Corporation: Unknown device 0132 Control: I/O+ Mem- BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR- <PERR- Latency: 0 Interrupt: pin A routed to IRQ 11 Region 4: I/O ports at e800 [size=32] 00: 86 80 c2 24 05 00 80 02 01 00 03 0c 00 00 80 00 10: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 20: 01 e8 00 00 00 00 00 00 00 00 00 00 28 10 32 01 30: 00 00 00 00 00 00 00 00 00 00 00 00 0b 01 00 00 00:1d.1 USB Controller: Intel Corp.: Unknown device 24c4 (rev 01) (prog-if 00 [UHCI]) Subsystem: Dell Computer Corporation: Unknown device 0132 Control: I/O+ Mem- BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR- <PERR- Latency: 0 Interrupt: pin B routed to IRQ 5 Region 4: I/O ports at e880 [size=32] 00: 86 80 c4 24 05 00 80 02 01 00 03 0c 00 00 00 00 10: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 20: 81 e8 00 00 00 00 00 00 00 00 00 00 28 10 32 01 30: 00 00 00 00 00 00 00 00 00 00 00 00 05 02 00 00 00:1d.2 USB Controller: Intel Corp.: Unknown device 24c7 (rev 01) (prog-if 00 [UHCI]) Subsystem: Dell Computer Corporation: Unknown device 0132 Control: I/O+ Mem- BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR- <PERR- Latency: 0 Interrupt: pin C routed to IRQ 9 Region 4: I/O ports at ec00 [size=32] 00: 86 80 c7 24 05 00 80 02 01 00 03 0c 00 00 00 00 10: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 20: 01 ec 00 00 00 00 00 00 00 00 00 00 28 10 32 01 30: 00 00 00 00 00 00 00 00 00 00 00 00 09 03 00 00 00:1d.7 USB Controller: Intel Corp.: Unknown device 24cd (rev 01) (prog-if 20 [EHCI]) Subsystem: Dell Computer Corporation: Unknown device 0132 Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR+ FastB2B- Status: Cap+ 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR- <PERR- Latency: 0 Interrupt: pin D routed to IRQ 10 Region 0: Memory at febffc00 (32-bit, non-prefetchable) [size=1K] Capabilities: [50] Power Management version 2 Flags: PMEClk- DSI- D1- D2- AuxCurrent=375mA PME(D0+,D1-,D2-,D3hot+,D3cold+) Status: D0 PME-Enable- DSel=0 DScale=0 PME- Capabilities: [58] #0a [2080] 00: 86 80 cd 24 06 01 90 02 01 20 03 0c 00 00 00 00 10: 00 fc bf fe 00 00 00 00 00 00 00 00 00 00 00 00 20: 00 00 00 00 00 00 00 00 00 00 00 00 28 10 32 01 30: 00 00 00 00 50 00 00 00 00 00 00 00 0a 04 00 00 00:1e.0 PCI bridge: Intel Corp. 82801BA/CA PCI Bridge (rev 81) (prog-if 00 [Normal decode]) Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR+ FastB2B- Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR+ Latency: 0 Bus: primary=00, secondary=02, subordinate=02, sec-latency=32 I/O behind bridge: 0000d000-0000dfff Memory behind bridge: fe800000-feafffff Prefetchable memory behind bridge: ec500000-ec5fffff BridgeCtl: Parity- SERR+ NoISA+ VGA- MAbort- >Reset- FastB2B- 00: 86 80 4e 24 07 01 80 80 81 00 04 06 00 00 01 00 10: 00 00 00 00 00 00 00 00 00 02 02 20 d0 d0 80 22 20: 80 fe a0 fe 50 ec 50 ec 00 00 00 00 00 00 00 00 30: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 06 00 00:1f.0 ISA bridge: Intel Corp.: Unknown device 24c0 (rev 01) Control: I/O+ Mem+ BusMaster+ SpecCycle+ MemWINV- VGASnoop- ParErr- Stepping- SERR+ FastB2B- Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR- <PERR- Latency: 0 00: 86 80 c0 24 0f 01 80 02 01 00 01 06 00 00 80 00 10: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 20: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 30: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00:1f.1 IDE interface: Intel Corp.: Unknown device 24cb (rev 01) (prog-if 8a [Master SecP PriP]) Subsystem: Dell Computer Corporation: Unknown device 0132 Control: I/O+ Mem- BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR- <PERR- Latency: 0 Interrupt: pin A routed to IRQ 9 Region 0: I/O ports at <unassigned> [size=8] Region 1: I/O ports at <unassigned> [size=4] Region 2: I/O ports at <unassigned> [size=8] Region 3: I/O ports at <unassigned> [size=4] Region 4: I/O ports at ffa0 [size=16] Region 5: Memory at 20000000 (32-bit, non-prefetchable) [disabled] [size=1K] 00: 86 80 cb 24 05 00 80 02 01 8a 01 01 00 00 00 00 10: 01 00 00 00 01 00 00 00 01 00 00 00 01 00 00 00 20: a1 ff 00 00 00 00 00 20 00 00 00 00 28 10 32 01 30: 00 00 00 00 00 00 00 00 00 00 00 00 ff 01 00 00 00:1f.3 SMBus: Intel Corp.: Unknown device 24c3 (rev 01) Subsystem: Dell Computer Corporation: Unknown device 0132 Control: I/O+ Mem- BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR- <PERR- Interrupt: pin B routed to IRQ 3 Region 4: I/O ports at e480 [size=32] 00: 86 80 c3 24 01 00 80 02 01 00 05 0c 00 00 00 00 10: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 20: 81 e4 00 00 00 00 00 00 00 00 00 00 28 10 32 01 30: 00 00 00 00 00 00 00 00 00 00 00 00 03 02 00 00 01:00.0 VGA compatible controller: nVidia Corporation NV25 [GeForce4 Ti4200] (rev a3) (prog-if 00 [VGA]) Subsystem: nVidia Corporation: Unknown device 0132 Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- Status: Cap+ 66Mhz+ UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR- <PERR- Latency: 248 (1250ns min, 250ns max) Interrupt: pin A routed to IRQ 11 Region 0: Memory at fd000000 (32-bit, non-prefetchable) [size=16M] Region 1: Memory at e0000000 (32-bit, prefetchable) [size=128M] Region 2: Memory at ec480000 (32-bit, prefetchable) [size=512K] Expansion ROM at fe7e0000 [disabled] [size=128K] Capabilities: [60] Power Management version 2 Flags: PMEClk- DSI- D1- D2- AuxCurrent=0mA PME(D0-,D1-,D2-,D3hot-,D3cold-) Status: D0 PME-Enable- DSel=0 DScale=0 PME- Capabilities: [44] AGP version 2.0 Status: RQ=31 SBA+ 64bit- FW+ Rate=x1,x2 Command: RQ=31 SBA- AGP+ 64bit- FW- Rate=<none> 00: de 10 53 02 07 00 b0 02 a3 00 00 03 00 f8 00 00 10: 00 00 00 fd 08 00 00 e0 08 00 48 ec 00 00 00 00 20: 00 00 00 00 00 00 00 00 00 00 00 00 de 10 32 01 30: 00 00 00 00 60 00 00 00 00 00 00 00 0b 01 05 01 02:00.0 Ethernet controller: Intel Corp. 82557/8/9 [Ethernet Pro 100] (rev 10) Subsystem: Intel Corp.: Unknown device 0071 Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV+ VGASnoop- ParErr- Stepping- SERR+ FastB2B- Status: Cap+ 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR- <PERR- Latency: 64 (2000ns min, 14000ns max), cache line size 08 Interrupt: pin A routed to IRQ 11 Region 0: Memory at feaff000 (32-bit, non-prefetchable) [size=4K] Region 1: I/O ports at dc00 [size=64] Region 2: Memory at feac0000 (32-bit, non-prefetchable) [size=128K] Expansion ROM at feae0000 [disabled] [size=64K] Capabilities: [dc] Power Management version 2 Flags: PMEClk- DSI+ D1+ D2+ AuxCurrent=0mA PME(D0+,D1+,D2+,D3hot+,D3cold+) Status: D0 PME-Enable- DSel=0 DScale=2 PME- 00: 86 80 29 12 17 01 90 02 10 00 00 02 08 40 00 00 10: 00 f0 af fe 01 dc 00 00 00 00 ac fe 00 00 00 00 20: 00 00 00 00 00 00 00 00 00 00 00 00 86 80 71 00 30: 00 00 ae fe dc 00 00 00 00 00 00 00 0b 01 08 38 02:02.0 Multimedia audio controller: Cirrus Logic CS 4614/22/24 [CrystalClear SoundFusion Audio Accelerator] (rev 01) Subsystem: Voyetra Technologies: Unknown device 3357 Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR+ FastB2B- Status: Cap+ 66Mhz- UDF- FastB2B- ParErr- DEVSEL=slow >TAbort- <TAbort- <MAbort- >SERR- <PERR- Latency: 64 (1000ns min, 6000ns max) Interrupt: pin A routed to IRQ 9 Region 0: Memory at feafd000 (32-bit, non-prefetchable) [size=4K] Region 1: Memory at fe900000 (32-bit, non-prefetchable) [size=1M] Capabilities: [40] Power Management version 2 Flags: PMEClk- DSI+ D1+ D2+ AuxCurrent=0mA PME(D0-,D1-,D2-,D3hot-,D3cold-) Status: D0 PME-Enable- DSel=0 DScale=0 PME- 00: 13 10 03 60 06 01 10 04 01 00 01 04 00 40 00 00 10: 00 d0 af fe 00 00 90 fe 00 00 00 00 00 00 00 00 20: 00 00 00 00 00 00 00 00 00 00 00 00 53 50 57 33 30: 00 00 00 00 40 00 00 00 00 00 00 00 09 01 04 18 ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: DMA Problems with Intel 845 Chipset and Northwood CPU 2002-08-07 23:35 ` DMA Problems with Intel 845 Chipset and Northwood CPU Mark Cuss @ 2002-08-08 0:58 ` John L. Korpi 2002-08-08 16:12 ` Mark Cuss 0 siblings, 1 reply; 47+ messages in thread From: John L. Korpi @ 2002-08-08 0:58 UTC (permalink / raw) To: mcuss, Linux-Kernel ML Had a similar problem with similar chipsets. 2.4.19-ac4 fixed the problem. Cheers jlk On Wednesday 07 August 2002 07:35 pm, Mark Cuss wrote: > Hello all, > > Please accept my apologies if this question has already been answered.... > > I have a new Pentium 4 computer and I can't get the DMA working for hard > disk transfers. Specifically, its a Dell Dimension 4500 with a Pentium 4 > 2.26 GHz processor (Northwood) and an 845 (Brookdale) chipset. RedHat > 7.3.... > > I noticed that the hard disk transfers were very slow. I tried to set up > DMA with hdparm - see below: > > [root@yoda ide]# hdparm /dev/hda > > /dev/hda: > multcount = 16 (on) > I/O support = 0 (default 16-bit) > unmaskirq = 0 (off) > using_dma = 0 (off) > keepsettings = 0 (off) > nowerr = 0 (off) > readonly = 0 (off) > readahead = 8 (on) > geometry = 4866/255/63, sectors = 78177792, start = 0 > busstate = 1 (on) > [root@yoda ide]# hdparm -d1 /dev/hda > > /dev/hda: > setting using_dma to 1 (on) > HDIO_SET_DMA failed: Operation not permitted > using_dma = 0 (off) > [root@yoda ide]# > > I thought that perhaps the chipset had changed between this Northwood > machine an the older core - I have a P4 1.8 GHz machine that DMA works fine > on. I upgraded to kernel 2.4.19 with no change. > > The IDE controller (according to Windows XP....) is an Intel 82801DB Ultra > ATA Storage Controller - 24CB. I've included the lspci --vvx listing from > the problem machine below - my aplogies for the long list. If anyone has > any suggestions I'd really appreciate them... > > Thanks.... > > Mark > > lspci listing: > > 00:00.0 Host bridge: Intel Corp. 82845 845 (Brookdale) Chipset Host Bridge > (rev 11) > Subsystem: Intel Corp. 82845 845 (Brookdale) Chipset Host Bridge > Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- > Stepping- SERR+ FastB2B- > Status: Cap+ 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=fast >TAbort- <TAbort- > <MAbort+ >SERR- <PERR- > Latency: 0 > Region 0: Memory at f0000000 (32-bit, prefetchable) [size=128M] > Capabilities: [e4] #09 [a104] > Capabilities: [a0] AGP version 2.0 > Status: RQ=31 SBA+ 64bit- FW+ Rate=x1,x2 > Command: RQ=0 SBA- AGP+ 64bit- FW- Rate=<none> > 00: 86 80 30 1a 06 01 90 20 11 00 00 06 00 00 00 00 > 10: 08 00 00 f0 00 00 00 00 00 00 00 00 00 00 00 00 > 20: 00 00 00 00 00 00 00 00 00 00 00 00 86 80 30 1a > 30: 00 00 00 00 e4 00 00 00 00 00 00 00 00 00 00 00 > > 00:01.0 PCI bridge: Intel Corp. 82845 845 (Brookdale) Chipset AGP Bridge > (rev 11) (prog-if 00 [Normal decode]) > Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- > Stepping- SERR+ FastB2B- > Status: Cap- 66Mhz+ UDF- FastB2B+ ParErr- DEVSEL=fast >TAbort- <TAbort- > <MAbort- >SERR- <PERR- > Latency: 64 > Bus: primary=00, secondary=01, subordinate=01, sec-latency=64 > Memory behind bridge: fc700000-fe7fffff > Prefetchable memory behind bridge: dc300000-ec4fffff > BridgeCtl: Parity- SERR+ NoISA- VGA+ MAbort- >Reset- FastB2B- > 00: 86 80 31 1a 07 01 a0 00 11 00 04 06 00 40 01 00 > 10: 00 00 00 00 00 00 00 00 00 01 01 40 f0 00 a0 22 > 20: 70 fc 70 fe 30 dc 40 ec 00 00 00 00 00 00 00 00 > 30: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0a 00 > > 00:1d.0 USB Controller: Intel Corp.: Unknown device 24c2 (rev 01) (prog-if > 00 [UHCI]) > Subsystem: Dell Computer Corporation: Unknown device 0132 > Control: I/O+ Mem- BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- > Stepping- SERR- FastB2B- > Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- > <MAbort- >SERR- <PERR- > Latency: 0 > Interrupt: pin A routed to IRQ 11 > Region 4: I/O ports at e800 [size=32] > 00: 86 80 c2 24 05 00 80 02 01 00 03 0c 00 00 80 00 > 10: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 > 20: 01 e8 00 00 00 00 00 00 00 00 00 00 28 10 32 01 > 30: 00 00 00 00 00 00 00 00 00 00 00 00 0b 01 00 00 > > 00:1d.1 USB Controller: Intel Corp.: Unknown device 24c4 (rev 01) (prog-if > 00 [UHCI]) > Subsystem: Dell Computer Corporation: Unknown device 0132 > Control: I/O+ Mem- BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- > Stepping- SERR- FastB2B- > Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- > <MAbort- >SERR- <PERR- > Latency: 0 > Interrupt: pin B routed to IRQ 5 > Region 4: I/O ports at e880 [size=32] > 00: 86 80 c4 24 05 00 80 02 01 00 03 0c 00 00 00 00 > 10: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 > 20: 81 e8 00 00 00 00 00 00 00 00 00 00 28 10 32 01 > 30: 00 00 00 00 00 00 00 00 00 00 00 00 05 02 00 00 > > 00:1d.2 USB Controller: Intel Corp.: Unknown device 24c7 (rev 01) (prog-if > 00 [UHCI]) > Subsystem: Dell Computer Corporation: Unknown device 0132 > Control: I/O+ Mem- BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- > Stepping- SERR- FastB2B- > Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- > <MAbort- >SERR- <PERR- > Latency: 0 > Interrupt: pin C routed to IRQ 9 > Region 4: I/O ports at ec00 [size=32] > 00: 86 80 c7 24 05 00 80 02 01 00 03 0c 00 00 00 00 > 10: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 > 20: 01 ec 00 00 00 00 00 00 00 00 00 00 28 10 32 01 > 30: 00 00 00 00 00 00 00 00 00 00 00 00 09 03 00 00 > > 00:1d.7 USB Controller: Intel Corp.: Unknown device 24cd (rev 01) (prog-if > 20 [EHCI]) > Subsystem: Dell Computer Corporation: Unknown device 0132 > Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- > Stepping- SERR+ FastB2B- > Status: Cap+ 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- > <MAbort- >SERR- <PERR- > Latency: 0 > Interrupt: pin D routed to IRQ 10 > Region 0: Memory at febffc00 (32-bit, non-prefetchable) [size=1K] > Capabilities: [50] Power Management version 2 > Flags: PMEClk- DSI- D1- D2- AuxCurrent=375mA > PME(D0+,D1-,D2-,D3hot+,D3cold+) > Status: D0 PME-Enable- DSel=0 DScale=0 PME- > Capabilities: [58] #0a [2080] > 00: 86 80 cd 24 06 01 90 02 01 20 03 0c 00 00 00 00 > 10: 00 fc bf fe 00 00 00 00 00 00 00 00 00 00 00 00 > 20: 00 00 00 00 00 00 00 00 00 00 00 00 28 10 32 01 > 30: 00 00 00 00 50 00 00 00 00 00 00 00 0a 04 00 00 > > 00:1e.0 PCI bridge: Intel Corp. 82801BA/CA PCI Bridge (rev 81) (prog-if 00 > [Normal decode]) > Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- > Stepping- SERR+ FastB2B- > Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=fast >TAbort- <TAbort- > <MAbort- >SERR- <PERR+ > Latency: 0 > Bus: primary=00, secondary=02, subordinate=02, sec-latency=32 > I/O behind bridge: 0000d000-0000dfff > Memory behind bridge: fe800000-feafffff > Prefetchable memory behind bridge: ec500000-ec5fffff > BridgeCtl: Parity- SERR+ NoISA+ VGA- MAbort- >Reset- FastB2B- > 00: 86 80 4e 24 07 01 80 80 81 00 04 06 00 00 01 00 > 10: 00 00 00 00 00 00 00 00 00 02 02 20 d0 d0 80 22 > 20: 80 fe a0 fe 50 ec 50 ec 00 00 00 00 00 00 00 00 > 30: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 06 00 > > 00:1f.0 ISA bridge: Intel Corp.: Unknown device 24c0 (rev 01) > Control: I/O+ Mem+ BusMaster+ SpecCycle+ MemWINV- VGASnoop- ParErr- > Stepping- SERR+ FastB2B- > Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- > <MAbort- >SERR- <PERR- > Latency: 0 > 00: 86 80 c0 24 0f 01 80 02 01 00 01 06 00 00 80 00 > 10: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 > 20: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 > 30: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 > > 00:1f.1 IDE interface: Intel Corp.: Unknown device 24cb (rev 01) (prog-if > 8a [Master SecP PriP]) > Subsystem: Dell Computer Corporation: Unknown device 0132 > Control: I/O+ Mem- BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- > Stepping- SERR- FastB2B- > Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- > <MAbort- >SERR- <PERR- > Latency: 0 > Interrupt: pin A routed to IRQ 9 > Region 0: I/O ports at <unassigned> [size=8] > Region 1: I/O ports at <unassigned> [size=4] > Region 2: I/O ports at <unassigned> [size=8] > Region 3: I/O ports at <unassigned> [size=4] > Region 4: I/O ports at ffa0 [size=16] > Region 5: Memory at 20000000 (32-bit, non-prefetchable) [disabled] > [size=1K] > 00: 86 80 cb 24 05 00 80 02 01 8a 01 01 00 00 00 00 > 10: 01 00 00 00 01 00 00 00 01 00 00 00 01 00 00 00 > 20: a1 ff 00 00 00 00 00 20 00 00 00 00 28 10 32 01 > 30: 00 00 00 00 00 00 00 00 00 00 00 00 ff 01 00 00 > > 00:1f.3 SMBus: Intel Corp.: Unknown device 24c3 (rev 01) > Subsystem: Dell Computer Corporation: Unknown device 0132 > Control: I/O+ Mem- BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr- > Stepping- SERR- FastB2B- > Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- > <MAbort- >SERR- <PERR- > Interrupt: pin B routed to IRQ 3 > Region 4: I/O ports at e480 [size=32] > 00: 86 80 c3 24 01 00 80 02 01 00 05 0c 00 00 00 00 > 10: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 > 20: 81 e4 00 00 00 00 00 00 00 00 00 00 28 10 32 01 > 30: 00 00 00 00 00 00 00 00 00 00 00 00 03 02 00 00 > > 01:00.0 VGA compatible controller: nVidia Corporation NV25 [GeForce4 > Ti4200] (rev a3) (prog-if 00 [VGA]) > Subsystem: nVidia Corporation: Unknown device 0132 > Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- > Stepping- SERR- FastB2B- > Status: Cap+ 66Mhz+ UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- > <MAbort- >SERR- <PERR- > Latency: 248 (1250ns min, 250ns max) > Interrupt: pin A routed to IRQ 11 > Region 0: Memory at fd000000 (32-bit, non-prefetchable) [size=16M] > Region 1: Memory at e0000000 (32-bit, prefetchable) [size=128M] > Region 2: Memory at ec480000 (32-bit, prefetchable) [size=512K] > Expansion ROM at fe7e0000 [disabled] [size=128K] > Capabilities: [60] Power Management version 2 > Flags: PMEClk- DSI- D1- D2- AuxCurrent=0mA > PME(D0-,D1-,D2-,D3hot-,D3cold-) Status: D0 PME-Enable- DSel=0 DScale=0 PME- > Capabilities: [44] AGP version 2.0 > Status: RQ=31 SBA+ 64bit- FW+ Rate=x1,x2 > Command: RQ=31 SBA- AGP+ 64bit- FW- Rate=<none> > 00: de 10 53 02 07 00 b0 02 a3 00 00 03 00 f8 00 00 > 10: 00 00 00 fd 08 00 00 e0 08 00 48 ec 00 00 00 00 > 20: 00 00 00 00 00 00 00 00 00 00 00 00 de 10 32 01 > 30: 00 00 00 00 60 00 00 00 00 00 00 00 0b 01 05 01 > > 02:00.0 Ethernet controller: Intel Corp. 82557/8/9 [Ethernet Pro 100] (rev > 10) > Subsystem: Intel Corp.: Unknown device 0071 > Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV+ VGASnoop- ParErr- > Stepping- SERR+ FastB2B- > Status: Cap+ 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- > <MAbort- >SERR- <PERR- > Latency: 64 (2000ns min, 14000ns max), cache line size 08 > Interrupt: pin A routed to IRQ 11 > Region 0: Memory at feaff000 (32-bit, non-prefetchable) [size=4K] > Region 1: I/O ports at dc00 [size=64] > Region 2: Memory at feac0000 (32-bit, non-prefetchable) [size=128K] > Expansion ROM at feae0000 [disabled] [size=64K] > Capabilities: [dc] Power Management version 2 > Flags: PMEClk- DSI+ D1+ D2+ AuxCurrent=0mA > PME(D0+,D1+,D2+,D3hot+,D3cold+) Status: D0 PME-Enable- DSel=0 DScale=2 PME- > 00: 86 80 29 12 17 01 90 02 10 00 00 02 08 40 00 00 > 10: 00 f0 af fe 01 dc 00 00 00 00 ac fe 00 00 00 00 > 20: 00 00 00 00 00 00 00 00 00 00 00 00 86 80 71 00 > 30: 00 00 ae fe dc 00 00 00 00 00 00 00 0b 01 08 38 > > 02:02.0 Multimedia audio controller: Cirrus Logic CS 4614/22/24 > [CrystalClear SoundFusion Audio Accelerator] (rev 01) > Subsystem: Voyetra Technologies: Unknown device 3357 > Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- > Stepping- SERR+ FastB2B- > Status: Cap+ 66Mhz- UDF- FastB2B- ParErr- DEVSEL=slow >TAbort- <TAbort- > <MAbort- >SERR- <PERR- > Latency: 64 (1000ns min, 6000ns max) > Interrupt: pin A routed to IRQ 9 > Region 0: Memory at feafd000 (32-bit, non-prefetchable) [size=4K] > Region 1: Memory at fe900000 (32-bit, non-prefetchable) [size=1M] > Capabilities: [40] Power Management version 2 > Flags: PMEClk- DSI+ D1+ D2+ AuxCurrent=0mA > PME(D0-,D1-,D2-,D3hot-,D3cold-) Status: D0 PME-Enable- DSel=0 DScale=0 PME- > 00: 13 10 03 60 06 01 10 04 01 00 01 04 00 40 00 00 > 10: 00 d0 af fe 00 00 90 fe 00 00 00 00 00 00 00 00 > 20: 00 00 00 00 00 00 00 00 00 00 00 00 53 50 57 33 > 30: 00 00 00 00 40 00 00 00 00 00 00 00 09 01 04 18 > > > - > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ -- John L. Korpi, Ph.D. -- Senior Scientist, Networks & Infrastructure NeuStar, Inc. Voice: 1.216.241.2919, 1.703.435.0682 Mobile: 1.216.233.3042 Pager: 1.800.398.2959 ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: DMA Problems with Intel 845 Chipset and Northwood CPU 2002-08-08 0:58 ` John L. Korpi @ 2002-08-08 16:12 ` Mark Cuss 0 siblings, 0 replies; 47+ messages in thread From: Mark Cuss @ 2002-08-08 16:12 UTC (permalink / raw) To: jkorpi, Linux-Kernel ML Thanks - that did the trick - the system runs much better with DMA on the hard disk :) Does anyone know if this patch will be included in the 2.4.20 release kernel? Thanks Mark ----- Original Message ----- From: "John L. Korpi" <jkorpi@mindspring.com> To: <mcuss@cdlsystems.com>; "Linux-Kernel ML" <linux-kernel@vger.kernel.org> Sent: Wednesday, August 07, 2002 6:58 PM Subject: Re: DMA Problems with Intel 845 Chipset and Northwood CPU > Had a similar problem with similar chipsets. 2.4.19-ac4 fixed the problem. > > Cheers > > jlk > > On Wednesday 07 August 2002 07:35 pm, Mark Cuss wrote: > > Hello all, > > > > Please accept my apologies if this question has already been answered.... > > > > I have a new Pentium 4 computer and I can't get the DMA working for hard > > disk transfers. Specifically, its a Dell Dimension 4500 with a Pentium 4 > > 2.26 GHz processor (Northwood) and an 845 (Brookdale) chipset. RedHat > > 7.3.... > > > > I noticed that the hard disk transfers were very slow. I tried to set up > > DMA with hdparm - see below: > > > > [root@yoda ide]# hdparm /dev/hda > > > > /dev/hda: > > multcount = 16 (on) > > I/O support = 0 (default 16-bit) > > unmaskirq = 0 (off) > > using_dma = 0 (off) > > keepsettings = 0 (off) > > nowerr = 0 (off) > > readonly = 0 (off) > > readahead = 8 (on) > > geometry = 4866/255/63, sectors = 78177792, start = 0 > > busstate = 1 (on) > > [root@yoda ide]# hdparm -d1 /dev/hda > > > > /dev/hda: > > setting using_dma to 1 (on) > > HDIO_SET_DMA failed: Operation not permitted > > using_dma = 0 (off) > > [root@yoda ide]# > > > > I thought that perhaps the chipset had changed between this Northwood > > machine an the older core - I have a P4 1.8 GHz machine that DMA works fine > > on. I upgraded to kernel 2.4.19 with no change. > > > > The IDE controller (according to Windows XP....) is an Intel 82801DB Ultra > > ATA Storage Controller - 24CB. I've included the lspci --vvx listing from > > the problem machine below - my aplogies for the long list. If anyone has > > any suggestions I'd really appreciate them... > > > > Thanks.... > > > > Mark > > > > lspci listing: > > > > 00:00.0 Host bridge: Intel Corp. 82845 845 (Brookdale) Chipset Host Bridge > > (rev 11) > > Subsystem: Intel Corp. 82845 845 (Brookdale) Chipset Host Bridge > > Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- > > Stepping- SERR+ FastB2B- > > Status: Cap+ 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=fast >TAbort- <TAbort- > > <MAbort+ >SERR- <PERR- > > Latency: 0 > > Region 0: Memory at f0000000 (32-bit, prefetchable) [size=128M] > > Capabilities: [e4] #09 [a104] > > Capabilities: [a0] AGP version 2.0 > > Status: RQ=31 SBA+ 64bit- FW+ Rate=x1,x2 > > Command: RQ=0 SBA- AGP+ 64bit- FW- Rate=<none> > > 00: 86 80 30 1a 06 01 90 20 11 00 00 06 00 00 00 00 > > 10: 08 00 00 f0 00 00 00 00 00 00 00 00 00 00 00 00 > > 20: 00 00 00 00 00 00 00 00 00 00 00 00 86 80 30 1a > > 30: 00 00 00 00 e4 00 00 00 00 00 00 00 00 00 00 00 > > > > 00:01.0 PCI bridge: Intel Corp. 82845 845 (Brookdale) Chipset AGP Bridge > > (rev 11) (prog-if 00 [Normal decode]) > > Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- > > Stepping- SERR+ FastB2B- > > Status: Cap- 66Mhz+ UDF- FastB2B+ ParErr- DEVSEL=fast >TAbort- <TAbort- > > <MAbort- >SERR- <PERR- > > Latency: 64 > > Bus: primary=00, secondary=01, subordinate=01, sec-latency=64 > > Memory behind bridge: fc700000-fe7fffff > > Prefetchable memory behind bridge: dc300000-ec4fffff > > BridgeCtl: Parity- SERR+ NoISA- VGA+ MAbort- >Reset- FastB2B- > > 00: 86 80 31 1a 07 01 a0 00 11 00 04 06 00 40 01 00 > > 10: 00 00 00 00 00 00 00 00 00 01 01 40 f0 00 a0 22 > > 20: 70 fc 70 fe 30 dc 40 ec 00 00 00 00 00 00 00 00 > > 30: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0a 00 > > > > 00:1d.0 USB Controller: Intel Corp.: Unknown device 24c2 (rev 01) (prog-if > > 00 [UHCI]) > > Subsystem: Dell Computer Corporation: Unknown device 0132 > > Control: I/O+ Mem- BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- > > Stepping- SERR- FastB2B- > > Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- > > <MAbort- >SERR- <PERR- > > Latency: 0 > > Interrupt: pin A routed to IRQ 11 > > Region 4: I/O ports at e800 [size=32] > > 00: 86 80 c2 24 05 00 80 02 01 00 03 0c 00 00 80 00 > > 10: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 > > 20: 01 e8 00 00 00 00 00 00 00 00 00 00 28 10 32 01 > > 30: 00 00 00 00 00 00 00 00 00 00 00 00 0b 01 00 00 > > > > 00:1d.1 USB Controller: Intel Corp.: Unknown device 24c4 (rev 01) (prog-if > > 00 [UHCI]) > > Subsystem: Dell Computer Corporation: Unknown device 0132 > > Control: I/O+ Mem- BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- > > Stepping- SERR- FastB2B- > > Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- > > <MAbort- >SERR- <PERR- > > Latency: 0 > > Interrupt: pin B routed to IRQ 5 > > Region 4: I/O ports at e880 [size=32] > > 00: 86 80 c4 24 05 00 80 02 01 00 03 0c 00 00 00 00 > > 10: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 > > 20: 81 e8 00 00 00 00 00 00 00 00 00 00 28 10 32 01 > > 30: 00 00 00 00 00 00 00 00 00 00 00 00 05 02 00 00 > > > > 00:1d.2 USB Controller: Intel Corp.: Unknown device 24c7 (rev 01) (prog-if > > 00 [UHCI]) > > Subsystem: Dell Computer Corporation: Unknown device 0132 > > Control: I/O+ Mem- BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- > > Stepping- SERR- FastB2B- > > Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- > > <MAbort- >SERR- <PERR- > > Latency: 0 > > Interrupt: pin C routed to IRQ 9 > > Region 4: I/O ports at ec00 [size=32] > > 00: 86 80 c7 24 05 00 80 02 01 00 03 0c 00 00 00 00 > > 10: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 > > 20: 01 ec 00 00 00 00 00 00 00 00 00 00 28 10 32 01 > > 30: 00 00 00 00 00 00 00 00 00 00 00 00 09 03 00 00 > > > > 00:1d.7 USB Controller: Intel Corp.: Unknown device 24cd (rev 01) (prog-if > > 20 [EHCI]) > > Subsystem: Dell Computer Corporation: Unknown device 0132 > > Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- > > Stepping- SERR+ FastB2B- > > Status: Cap+ 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- > > <MAbort- >SERR- <PERR- > > Latency: 0 > > Interrupt: pin D routed to IRQ 10 > > Region 0: Memory at febffc00 (32-bit, non-prefetchable) [size=1K] > > Capabilities: [50] Power Management version 2 > > Flags: PMEClk- DSI- D1- D2- AuxCurrent=375mA > > PME(D0+,D1-,D2-,D3hot+,D3cold+) > > Status: D0 PME-Enable- DSel=0 DScale=0 PME- > > Capabilities: [58] #0a [2080] > > 00: 86 80 cd 24 06 01 90 02 01 20 03 0c 00 00 00 00 > > 10: 00 fc bf fe 00 00 00 00 00 00 00 00 00 00 00 00 > > 20: 00 00 00 00 00 00 00 00 00 00 00 00 28 10 32 01 > > 30: 00 00 00 00 50 00 00 00 00 00 00 00 0a 04 00 00 > > > > 00:1e.0 PCI bridge: Intel Corp. 82801BA/CA PCI Bridge (rev 81) (prog-if 00 > > [Normal decode]) > > Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- > > Stepping- SERR+ FastB2B- > > Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=fast >TAbort- <TAbort- > > <MAbort- >SERR- <PERR+ > > Latency: 0 > > Bus: primary=00, secondary=02, subordinate=02, sec-latency=32 > > I/O behind bridge: 0000d000-0000dfff > > Memory behind bridge: fe800000-feafffff > > Prefetchable memory behind bridge: ec500000-ec5fffff > > BridgeCtl: Parity- SERR+ NoISA+ VGA- MAbort- >Reset- FastB2B- > > 00: 86 80 4e 24 07 01 80 80 81 00 04 06 00 00 01 00 > > 10: 00 00 00 00 00 00 00 00 00 02 02 20 d0 d0 80 22 > > 20: 80 fe a0 fe 50 ec 50 ec 00 00 00 00 00 00 00 00 > > 30: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 06 00 > > > > 00:1f.0 ISA bridge: Intel Corp.: Unknown device 24c0 (rev 01) > > Control: I/O+ Mem+ BusMaster+ SpecCycle+ MemWINV- VGASnoop- ParErr- > > Stepping- SERR+ FastB2B- > > Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- > > <MAbort- >SERR- <PERR- > > Latency: 0 > > 00: 86 80 c0 24 0f 01 80 02 01 00 01 06 00 00 80 00 > > 10: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 > > 20: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 > > 30: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 > > > > 00:1f.1 IDE interface: Intel Corp.: Unknown device 24cb (rev 01) (prog-if > > 8a [Master SecP PriP]) > > Subsystem: Dell Computer Corporation: Unknown device 0132 > > Control: I/O+ Mem- BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- > > Stepping- SERR- FastB2B- > > Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- > > <MAbort- >SERR- <PERR- > > Latency: 0 > > Interrupt: pin A routed to IRQ 9 > > Region 0: I/O ports at <unassigned> [size=8] > > Region 1: I/O ports at <unassigned> [size=4] > > Region 2: I/O ports at <unassigned> [size=8] > > Region 3: I/O ports at <unassigned> [size=4] > > Region 4: I/O ports at ffa0 [size=16] > > Region 5: Memory at 20000000 (32-bit, non-prefetchable) [disabled] > > [size=1K] > > 00: 86 80 cb 24 05 00 80 02 01 8a 01 01 00 00 00 00 > > 10: 01 00 00 00 01 00 00 00 01 00 00 00 01 00 00 00 > > 20: a1 ff 00 00 00 00 00 20 00 00 00 00 28 10 32 01 > > 30: 00 00 00 00 00 00 00 00 00 00 00 00 ff 01 00 00 > > > > 00:1f.3 SMBus: Intel Corp.: Unknown device 24c3 (rev 01) > > Subsystem: Dell Computer Corporation: Unknown device 0132 > > Control: I/O+ Mem- BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr- > > Stepping- SERR- FastB2B- > > Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- > > <MAbort- >SERR- <PERR- > > Interrupt: pin B routed to IRQ 3 > > Region 4: I/O ports at e480 [size=32] > > 00: 86 80 c3 24 01 00 80 02 01 00 05 0c 00 00 00 00 > > 10: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 > > 20: 81 e4 00 00 00 00 00 00 00 00 00 00 28 10 32 01 > > 30: 00 00 00 00 00 00 00 00 00 00 00 00 03 02 00 00 > > > > 01:00.0 VGA compatible controller: nVidia Corporation NV25 [GeForce4 > > Ti4200] (rev a3) (prog-if 00 [VGA]) > > Subsystem: nVidia Corporation: Unknown device 0132 > > Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- > > Stepping- SERR- FastB2B- > > Status: Cap+ 66Mhz+ UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- > > <MAbort- >SERR- <PERR- > > Latency: 248 (1250ns min, 250ns max) > > Interrupt: pin A routed to IRQ 11 > > Region 0: Memory at fd000000 (32-bit, non-prefetchable) [size=16M] > > Region 1: Memory at e0000000 (32-bit, prefetchable) [size=128M] > > Region 2: Memory at ec480000 (32-bit, prefetchable) [size=512K] > > Expansion ROM at fe7e0000 [disabled] [size=128K] > > Capabilities: [60] Power Management version 2 > > Flags: PMEClk- DSI- D1- D2- AuxCurrent=0mA > > PME(D0-,D1-,D2-,D3hot-,D3cold-) Status: D0 PME-Enable- DSel=0 DScale=0 PME- > > Capabilities: [44] AGP version 2.0 > > Status: RQ=31 SBA+ 64bit- FW+ Rate=x1,x2 > > Command: RQ=31 SBA- AGP+ 64bit- FW- Rate=<none> > > 00: de 10 53 02 07 00 b0 02 a3 00 00 03 00 f8 00 00 > > 10: 00 00 00 fd 08 00 00 e0 08 00 48 ec 00 00 00 00 > > 20: 00 00 00 00 00 00 00 00 00 00 00 00 de 10 32 01 > > 30: 00 00 00 00 60 00 00 00 00 00 00 00 0b 01 05 01 > > > > 02:00.0 Ethernet controller: Intel Corp. 82557/8/9 [Ethernet Pro 100] (rev > > 10) > > Subsystem: Intel Corp.: Unknown device 0071 > > Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV+ VGASnoop- ParErr- > > Stepping- SERR+ FastB2B- > > Status: Cap+ 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- > > <MAbort- >SERR- <PERR- > > Latency: 64 (2000ns min, 14000ns max), cache line size 08 > > Interrupt: pin A routed to IRQ 11 > > Region 0: Memory at feaff000 (32-bit, non-prefetchable) [size=4K] > > Region 1: I/O ports at dc00 [size=64] > > Region 2: Memory at feac0000 (32-bit, non-prefetchable) [size=128K] > > Expansion ROM at feae0000 [disabled] [size=64K] > > Capabilities: [dc] Power Management version 2 > > Flags: PMEClk- DSI+ D1+ D2+ AuxCurrent=0mA > > PME(D0+,D1+,D2+,D3hot+,D3cold+) Status: D0 PME-Enable- DSel=0 DScale=2 PME- > > 00: 86 80 29 12 17 01 90 02 10 00 00 02 08 40 00 00 > > 10: 00 f0 af fe 01 dc 00 00 00 00 ac fe 00 00 00 00 > > 20: 00 00 00 00 00 00 00 00 00 00 00 00 86 80 71 00 > > 30: 00 00 ae fe dc 00 00 00 00 00 00 00 0b 01 08 38 > > > > 02:02.0 Multimedia audio controller: Cirrus Logic CS 4614/22/24 > > [CrystalClear SoundFusion Audio Accelerator] (rev 01) > > Subsystem: Voyetra Technologies: Unknown device 3357 > > Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- > > Stepping- SERR+ FastB2B- > > Status: Cap+ 66Mhz- UDF- FastB2B- ParErr- DEVSEL=slow >TAbort- <TAbort- > > <MAbort- >SERR- <PERR- > > Latency: 64 (1000ns min, 6000ns max) > > Interrupt: pin A routed to IRQ 9 > > Region 0: Memory at feafd000 (32-bit, non-prefetchable) [size=4K] > > Region 1: Memory at fe900000 (32-bit, non-prefetchable) [size=1M] > > Capabilities: [40] Power Management version 2 > > Flags: PMEClk- DSI+ D1+ D2+ AuxCurrent=0mA > > PME(D0-,D1-,D2-,D3hot-,D3cold-) Status: D0 PME-Enable- DSel=0 DScale=0 PME- > > 00: 13 10 03 60 06 01 10 04 01 00 01 04 00 40 00 00 > > 10: 00 d0 af fe 00 00 90 fe 00 00 00 00 00 00 00 00 > > 20: 00 00 00 00 00 00 00 00 00 00 00 00 53 50 57 33 > > 30: 00 00 00 00 40 00 00 00 00 00 00 00 09 01 04 18 > > > > > > - > > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > > the body of a message to majordomo@vger.kernel.org > > More majordomo info at http://vger.kernel.org/majordomo-info.html > > Please read the FAQ at http://www.tux.org/lkml/ > > -- > John L. Korpi, Ph.D. -- Senior Scientist, Networks & Infrastructure > NeuStar, Inc. > Voice: 1.216.241.2919, 1.703.435.0682 > Mobile: 1.216.233.3042 Pager: 1.800.398.2959 > > ^ permalink raw reply [flat|nested] 47+ messages in thread
* [patch] tls-2.5.31-C3 2002-08-07 18:33 ` Linus Torvalds ` (3 preceding siblings ...) 2002-08-07 22:36 ` Luca Barbieri @ 2002-08-11 21:46 ` Ingo Molnar 2002-08-12 7:34 ` Stephen Rothwell ` (2 more replies) 4 siblings, 3 replies; 47+ messages in thread From: Ingo Molnar @ 2002-08-11 21:46 UTC (permalink / raw) To: Linus Torvalds; +Cc: linux-kernel, Alexandre Julliard, Luca Barbieri [-- Attachment #1: Type: TEXT/PLAIN, Size: 24215 bytes --] the attached patch cleans up the TLS code and it introduces a number of new capabilities as well: - move the TLS space to the first 12 GDT descriptors - kernel descriptors come afterwards. - make USER CS and DS just another TLS entry, which happen to have a default value that matches the current segments. It's done in a way that does not result in extra context-switch overhead. - make segment 0040 available to Wine, allow the setting of 16-bit segments. Allow full flexibility of all the safe segment variants. - sys_set_thread_area(&info) can be both for a specific GDT entry, but it can also trigger an 'allocation' of a yet unused TLS entry, by using an ->entry_number of -1. It's recommended for userspace code to use the -1 value, to make sure different libraries can nest properly. - sys_get_thread_area(&info) can be used to read TLS entries into the same userspace descriptor format as sys_set_thread_area() does. The new syscalls are now actually relatively clean, and the TLS area can be extended seemlessly. - move KERNEL CS, DS, TSS and LDT to the same cacheline. - clean up all the kernel descriptors to be more or less easily modified/reordered from segment.h only, with minimal dependencies. - move the GDT/TLS definitions to asm-i386/segment.h, to make it easier to include the constants into assembly code and lowlevel include files. an open issue: the context-switch code uses an optimized variant of TLS loading - only the truly affected portions of the GDT get rewritten. But i'm not 100% convinced this is the right way - i kept the TLS in the same format as the GDT, so we could as well just write 96 bytes unconditionally. That's smaller a single cacheline on modern CPUs. Doing this would greatly simplify the code. I've mainly done this current optimization to show that it can be done in a relatively straightforward way, but that i dont think it's worth it. Especially since the TLS area is 3 32-byte cachelines, it should easily trigger all the memcpy fastpaths in various CPUs. So i'd suggest to keep the tls_bytes variables only, and thus non-TLS code would see only a single branch in the context-switch path. another issue: i've not gone the whole way of unifying LDT and TLS support - we've already got compatibility code in the LDT interfaces and changing LDTs via the TLS syscalls would only make the situation even more messy. Nevertheless there are some new synergies between the LDT and TSS code, which resulted in some ldt.c code reduction. i've attached a new version of tls.c that tests the new TLS syscall variants and shows off some of the new capabilities. TLS support works just fine on 2.5.31 + this patch, on SMP and UP as well. Comments? Ingo --- linux/drivers/pnp/pnpbios_core.c.orig Sun Aug 11 17:01:17 2002 +++ linux/drivers/pnp/pnpbios_core.c Sun Aug 11 23:28:44 2002 @@ -90,7 +90,8 @@ static union pnp_bios_expansion_header * pnp_bios_hdr = NULL; /* The PnP BIOS entries in the GDT */ -#define PNP_GDT (0x0060) +#define PNP_GDT (GDT_ENTRY_PNPBIOS_BASE * 8) + #define PNP_CS32 (PNP_GDT+0x00) /* segment for calling fn */ #define PNP_CS16 (PNP_GDT+0x08) /* code segment for BIOS */ #define PNP_DS (PNP_GDT+0x10) /* data segment for BIOS */ --- linux/arch/i386/kernel/cpu/common.c.orig Sun Aug 11 17:01:06 2002 +++ linux/arch/i386/kernel/cpu/common.c Sun Aug 11 23:28:44 2002 @@ -423,6 +423,7 @@ { int cpu = smp_processor_id(); struct tss_struct * t = init_tss + cpu; + struct thread_struct *thread = ¤t->thread; if (test_and_set_bit(cpu, &cpu_initialized)) { printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); @@ -447,9 +448,14 @@ */ if (cpu) { memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE); - cpu_gdt_descr[cpu].size = GDT_SIZE; + cpu_gdt_descr[cpu].size = GDT_SIZE - 1; cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu]; } + /* + * Set up the per-thread TLS descriptor cache: + */ + memcpy(thread->tls_array, cpu_gdt_table[cpu], GDT_ENTRY_TLS_MAX * 8); + clear_TLS(thread); __asm__ __volatile__("lgdt %0": "=m" (cpu_gdt_descr[cpu])); __asm__ __volatile__("lidt %0": "=m" (idt_descr)); @@ -468,9 +474,9 @@ BUG(); enter_lazy_tlb(&init_mm, current, cpu); - t->esp0 = current->thread.esp0; + t->esp0 = thread->esp0; set_tss_desc(cpu,t); - cpu_gdt_table[cpu][TSS_ENTRY].b &= 0xfffffdff; + cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff; load_TR_desc(); load_LDT(&init_mm.context); --- linux/arch/i386/kernel/entry.S.orig Sun Aug 11 17:01:07 2002 +++ linux/arch/i386/kernel/entry.S Sun Aug 11 23:28:44 2002 @@ -753,6 +753,7 @@ .long sys_sched_setaffinity .long sys_sched_getaffinity .long sys_set_thread_area + .long sys_get_thread_area .rept NR_syscalls-(.-sys_call_table)/4 .long sys_ni_syscall --- linux/arch/i386/kernel/head.S.orig Sun Aug 11 17:01:06 2002 +++ linux/arch/i386/kernel/head.S Sun Aug 11 23:28:44 2002 @@ -239,12 +239,7 @@ movl %eax,%es movl %eax,%fs movl %eax,%gs -#ifdef CONFIG_SMP - movl $(__KERNEL_DS), %eax - movl %eax,%ss # Reload the stack pointer (segment only) -#else - lss stack_start,%esp # Load processor stack -#endif + movl %eax,%ss xorl %eax,%eax lldt %ax cld # gcc2 wants the direction flag cleared at all times @@ -412,34 +407,44 @@ ALIGN /* - * The Global Descriptor Table contains 20 quadwords, per-CPU. + * The Global Descriptor Table contains 28 quadwords, per-CPU. */ ENTRY(cpu_gdt_table) .quad 0x0000000000000000 /* NULL descriptor */ - .quad 0x0000000000000000 /* TLS descriptor */ - .quad 0x00cf9a000000ffff /* 0x10 kernel 4GB code at 0x00000000 */ - .quad 0x00cf92000000ffff /* 0x18 kernel 4GB data at 0x00000000 */ - .quad 0x00cffa000000ffff /* 0x23 user 4GB code at 0x00000000 */ - .quad 0x00cff2000000ffff /* 0x2b user 4GB data at 0x00000000 */ - .quad 0x0000000000000000 /* TSS descriptor */ - .quad 0x0000000000000000 /* LDT descriptor */ + .quad 0x00cffa000000ffff /* 0x0b user 4GB code at 0x00000000 */ + .quad 0x00cff2000000ffff /* 0x13 user 4GB data at 0x00000000 */ + .quad 0x0000000000000000 /* 0x1b TLS entry 3 */ + .quad 0x0000000000000000 /* ... */ + .quad 0x0000000000000000 + .quad 0x0000000000000000 + .quad 0x0000000000000000 + .quad 0x0000000000000000 + .quad 0x0000000000000000 + .quad 0x0000000000000000 /* ... */ + .quad 0x0000000000000000 /* 0x5b TLS entry 11 */ + + .quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */ + .quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */ + .quad 0x0000000000000000 /* 0x70 TSS descriptor */ + .quad 0x0000000000000000 /* 0x78 LDT descriptor */ + /* * The APM segments have byte granularity and their bases * and limits are set at run time. */ - .quad 0x0040920000000000 /* 0x40 APM set up for bad BIOS's */ - .quad 0x00409a0000000000 /* 0x48 APM CS code */ - .quad 0x00009a0000000000 /* 0x50 APM CS 16 code (16 bit) */ - .quad 0x0040920000000000 /* 0x58 APM DS data */ + .quad 0x0040920000000000 /* 0x80 APM set up for bad BIOS's */ + .quad 0x00409a0000000000 /* 0x88 APM CS code */ + .quad 0x00009a0000000000 /* 0x90 APM CS 16 code (16 bit) */ + .quad 0x0040920000000000 /* 0x98 APM DS data */ /* Segments used for calling PnP BIOS */ - .quad 0x00c09a0000000000 /* 0x60 32-bit code */ - .quad 0x00809a0000000000 /* 0x68 16-bit code */ - .quad 0x0080920000000000 /* 0x70 16-bit data */ - .quad 0x0080920000000000 /* 0x78 16-bit data */ - .quad 0x0080920000000000 /* 0x80 16-bit data */ - .quad 0x0000000000000000 /* 0x88 not used */ - .quad 0x0000000000000000 /* 0x90 not used */ - .quad 0x0000000000000000 /* 0x98 not used */ + .quad 0x00c09a0000000000 /* 0xa0 32-bit code */ + .quad 0x00809a0000000000 /* 0xa8 16-bit code */ + .quad 0x0080920000000000 /* 0xb0 16-bit data */ + .quad 0x0080920000000000 /* 0xb8 16-bit data */ + .quad 0x0080920000000000 /* 0xc0 16-bit data */ + .quad 0x0000000000000000 /* 0xc8 not used */ + .quad 0x0000000000000000 /* 0xd0 not used */ + .quad 0x0000000000000000 /* 0xd8 not used */ #if CONFIG_SMP .fill (NR_CPUS-1)*GDT_ENTRIES,8,0 /* other CPU's GDT */ --- linux/arch/i386/kernel/process.c.orig Sun Aug 11 17:01:08 2002 +++ linux/arch/i386/kernel/process.c Sun Aug 11 23:28:44 2002 @@ -681,11 +681,9 @@ /* * Load the per-thread Thread-Local Storage descriptor. - * - * NOTE: it's faster to do the two stores unconditionally - * than to branch away. */ - load_TLS_desc(next, cpu); + if (prev->nr_tls_bytes || next->nr_tls_bytes) + load_TLS(prev, next, cpu); /* * Save away %fs and %gs. No need to save %es and %ds, as @@ -834,35 +832,168 @@ #undef first_sched /* - * Set the Thread-Local Storage area: + * sys_alloc_thread_area: get a yet unused TLS descriptor index. */ -asmlinkage int sys_set_thread_area(unsigned long base, unsigned long flags) +static int get_free_idx(void) { struct thread_struct *t = ¤t->thread; - int writable = 0; - int cpu; + int idx; - /* do not allow unused flags */ - if (flags & ~TLS_FLAGS_MASK) + for (idx = GDT_ENTRY_TLS_MIN; idx <= GDT_ENTRY_TLS_MAX; idx++) + if (desc_empty(t->tls_array + idx)) + return idx; + return -ESRCH; +} + +static inline int first_tls(struct desc_struct *array) +{ + struct desc_struct *default_array = init_task.thread.tls_array; + int idx; + + for (idx = GDT_ENTRY_TLS_MIN; idx <= GDT_ENTRY_TLS_MAX; idx++) + if (!desc_equal(array + idx, default_array + idx)) + return idx; + + return 0; +} + +static inline int last_tls(struct desc_struct *array) +{ + struct desc_struct *default_array = init_task.thread.tls_array; + int idx; + + for (idx = GDT_ENTRY_TLS_MAX; idx >= GDT_ENTRY_TLS_MIN; idx--) + if (!desc_equal(array + idx, default_array + idx)) + return idx; + + return 0; +} + +#define CHECK_TLS_IDX(idx) \ +do { \ + if ((idx) < GDT_ENTRY_TLS_MIN || (idx) > GDT_ENTRY_TLS_MAX) \ + BUG(); \ +} while (0) + +/* + * Set a given TLS descriptor: + */ +asmlinkage int sys_set_thread_area(struct modify_ldt_ldt_s *u_info) +{ + struct thread_struct *t = ¤t->thread; + struct modify_ldt_ldt_s info; + struct desc_struct *desc; + int cpu, idx; + + if (copy_from_user(&info, u_info, sizeof(info))) + return -EFAULT; + idx = info.entry_number; + + /* + * index -1 means the kernel should try to find and + * allocate an empty descriptor: + */ + if (idx == -1) { + idx = get_free_idx(); + if (idx < 0) + return idx; + if (put_user(idx, &u_info->entry_number)) + return -EFAULT; + } + + if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) return -EINVAL; - if (flags & TLS_FLAG_WRITABLE) - writable = 1; + desc = t->tls_array + idx; /* * We must not get preempted while modifying the TLS. */ cpu = get_cpu(); - t->tls_desc.a = ((base & 0x0000ffff) << 16) | 0xffff; + if (LDT_empty(&info)) { + desc->a = 0; + desc->b = 0; + } else { + desc->a = LDT_entry_a(&info); + desc->b = LDT_entry_b(&info); + } + + t->first_tls_byte = first_tls(t->tls_array) * 8; + t->last_tls_byte = (last_tls(t->tls_array) + 1) * 8; - t->tls_desc.b = (base & 0xff000000) | ((base & 0x00ff0000) >> 16) | - 0xf0000 | (writable << 9) | (1 << 15) | - (1 << 22) | (1 << 23) | 0x7000; + if (t->first_tls_byte || t->last_tls_byte) { + CHECK_TLS_IDX(t->first_tls_byte/8); + CHECK_TLS_IDX(t->last_tls_byte/8-1); + t->nr_tls_bytes = t->last_tls_byte - t->first_tls_byte; + if (t->nr_tls_bytes < 0) + BUG(); + if (t->nr_tls_bytes > GDT_ENTRY_TLS_ENTRIES * 8) + BUG(); + } else { + /* + * If a thread has no TLS then invert the first/last + * range so that if we switch from (or to) a TLS-using + * thread then it will be the thread's TLS area that + * will be copied into the GDT. + */ + t->nr_tls_bytes = 0; + t->first_tls_byte = 0; + t->last_tls_byte = (GDT_ENTRY_TLS_MAX + 1) * 8; + } + + load_TLS(t, t, cpu); - load_TLS_desc(t, cpu); put_cpu(); - return TLS_ENTRY*8 + 3; + return 0; +} + +/* + * Get the current Thread-Local Storage area: + */ + +#define GET_BASE(desc) ( \ + (((desc)->a >> 16) & 0x0000ffff) | \ + (((desc)->b << 16) & 0x00ff0000) | \ + ( (desc)->b & 0xff000000) ) + +#define GET_LIMIT(desc) ( \ + ((desc)->a & 0x0ffff) | \ + ((desc)->b & 0xf0000) ) + +#define GET_32BIT(desc) (((desc)->b >> 23) & 1) +#define GET_CONTENTS(desc) (((desc)->b >> 10) & 3) +#define GET_WRITABLE(desc) (((desc)->b >> 9) & 1) +#define GET_LIMIT_PAGES(desc) (((desc)->b >> 23) & 1) +#define GET_PRESENT(desc) (((desc)->b >> 15) & 1) +#define GET_USEABLE(desc) (((desc)->b >> 20) & 1) + +asmlinkage int sys_get_thread_area(struct modify_ldt_ldt_s *u_info) +{ + struct modify_ldt_ldt_s info; + struct desc_struct *desc; + int idx; + + if (get_user(idx, &u_info->entry_number)) + return -EFAULT; + if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) + return -EINVAL; + + desc = current->thread.tls_array + idx; + + info.entry_number = idx; + info.base_addr = GET_BASE(desc); + info.limit = GET_LIMIT(desc); + info.seg_32bit = GET_32BIT(desc); + info.contents = GET_CONTENTS(desc); + info.read_exec_only = !GET_WRITABLE(desc); + info.limit_in_pages = GET_LIMIT_PAGES(desc); + info.seg_not_present = !GET_PRESENT(desc); + info.useable = GET_USEABLE(desc); + + if (copy_to_user(u_info, &info, sizeof(info))) + return -EFAULT; + return 0; } --- linux/arch/i386/kernel/suspend.c.orig Sun Aug 11 17:01:06 2002 +++ linux/arch/i386/kernel/suspend.c Sun Aug 11 23:28:44 2002 @@ -207,7 +207,7 @@ struct tss_struct * t = init_tss + cpu; set_tss_desc(cpu,t); /* This just modifies memory; should not be neccessary. But... This is neccessary, because 386 hardware has concept of busy tsc or some similar stupidity. */ - cpu_gdt_table[cpu][TSS_ENTRY].b &= 0xfffffdff; + cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff; load_TR_desc(); /* This does ltr */ load_LDT(¤t->mm->context); /* This does lldt */ --- linux/arch/i386/kernel/ldt.c.orig Sun Aug 11 17:01:04 2002 +++ linux/arch/i386/kernel/ldt.c Sun Aug 11 23:28:44 2002 @@ -200,32 +200,17 @@ /* Allow LDTs to be cleared by the user. */ if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { - if (oldmode || - (ldt_info.contents == 0 && - ldt_info.read_exec_only == 1 && - ldt_info.seg_32bit == 0 && - ldt_info.limit_in_pages == 0 && - ldt_info.seg_not_present == 1 && - ldt_info.useable == 0 )) { + if (oldmode || LDT_empty(&ldt_info)) { entry_1 = 0; entry_2 = 0; goto install; } } - entry_1 = ((ldt_info.base_addr & 0x0000ffff) << 16) | - (ldt_info.limit & 0x0ffff); - entry_2 = (ldt_info.base_addr & 0xff000000) | - ((ldt_info.base_addr & 0x00ff0000) >> 16) | - (ldt_info.limit & 0xf0000) | - ((ldt_info.read_exec_only ^ 1) << 9) | - (ldt_info.contents << 10) | - ((ldt_info.seg_not_present ^ 1) << 15) | - (ldt_info.seg_32bit << 22) | - (ldt_info.limit_in_pages << 23) | - 0x7000; - if (!oldmode) - entry_2 |= (ldt_info.useable << 20); + entry_1 = LDT_entry_a(&ldt_info); + entry_2 = LDT_entry_b(&ldt_info); + if (oldmode) + entry_2 &= ~(1 << 20); /* Install the new entry ... */ install: --- linux/arch/i386/boot/setup.S.orig Sun Jun 9 07:26:32 2002 +++ linux/arch/i386/boot/setup.S Sun Aug 11 23:28:44 2002 @@ -1005,9 +1005,14 @@ ret # Descriptor tables +# +# NOTE: if you think the GDT is large, you can make it smaller by just +# defining the KERNEL_CS and KERNEL_DS entries and shifting the gdt +# address down by GDT_ENTRY_KERNEL_CS*8. This puts bogus entries into +# the GDT, but those wont be used so it's not a problem. +# gdt: - .word 0, 0, 0, 0 # dummy - .word 0, 0, 0, 0 # unused + .fill GDT_ENTRY_KERNEL_CS,8,0 .word 0xFFFF # 4Gb - (0x100000*0x1000 = 4Gb) .word 0 # base address = 0 --- linux/include/linux/apm_bios.h.orig Sun Jun 9 07:30:24 2002 +++ linux/include/linux/apm_bios.h Sun Aug 11 23:28:44 2002 @@ -21,8 +21,8 @@ #ifdef __KERNEL__ -#define APM_40 0x40 -#define APM_CS (APM_40 + 8) +#define APM_40 (GDT_ENTRY_APMBIOS_BASE * 8) +#define APM_CS (APM_BASE + 8) #define APM_CS_16 (APM_CS + 8) #define APM_DS (APM_CS_16 + 8) --- linux/include/asm-i386/desc.h.orig Sun Aug 11 17:01:07 2002 +++ linux/include/asm-i386/desc.h Sun Aug 11 23:28:44 2002 @@ -2,50 +2,12 @@ #define __ARCH_DESC_H #include <asm/ldt.h> - -/* - * The layout of the per-CPU GDT under Linux: - * - * 0 - null - * 1 - Thread-Local Storage (TLS) segment - * 2 - kernel code segment - * 3 - kernel data segment - * 4 - user code segment <==== new cacheline - * 5 - user data segment - * 6 - TSS - * 7 - LDT - * 8 - APM BIOS support <==== new cacheline - * 9 - APM BIOS support - * 10 - APM BIOS support - * 11 - APM BIOS support - * 12 - PNPBIOS support <==== new cacheline - * 13 - PNPBIOS support - * 14 - PNPBIOS support - * 15 - PNPBIOS support - * 16 - PNPBIOS support <==== new cacheline - * 17 - not used - * 18 - not used - * 19 - not used - */ -#define TLS_ENTRY 1 -#define TSS_ENTRY 6 -#define LDT_ENTRY 7 -/* - * The interrupt descriptor table has room for 256 idt's, - * the global descriptor table is dependent on the number - * of tasks we can have.. - * - * We pad the GDT to cacheline boundary. - */ -#define IDT_ENTRIES 256 -#define GDT_ENTRIES 20 +#include <asm/segment.h> #ifndef __ASSEMBLY__ #include <asm/mmu.h> -#define GDT_SIZE (GDT_ENTRIES*sizeof(struct desc_struct)) - extern struct desc_struct cpu_gdt_table[NR_CPUS][GDT_ENTRIES]; struct Xgt_desc_struct { @@ -55,8 +17,8 @@ extern struct Xgt_desc_struct idt_descr, cpu_gdt_descr[NR_CPUS]; -#define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (TSS_ENTRY<<3)) -#define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (LDT_ENTRY<<3)) +#define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (GDT_ENTRY_TSS*8)) +#define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (GDT_ENTRY_LDT*8)) /* * This is the ldt that every process will get unless we need @@ -78,21 +40,52 @@ static inline void set_tss_desc(unsigned int cpu, void *addr) { - _set_tssldt_desc(&cpu_gdt_table[cpu][TSS_ENTRY], (int)addr, 235, 0x89); + _set_tssldt_desc(&cpu_gdt_table[cpu][GDT_ENTRY_TSS], (int)addr, 235, 0x89); } static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size) { - _set_tssldt_desc(&cpu_gdt_table[cpu][LDT_ENTRY], (int)addr, ((size << 3)-1), 0x82); + _set_tssldt_desc(&cpu_gdt_table[cpu][GDT_ENTRY_LDT], (int)addr, ((size << 3)-1), 0x82); } -#define TLS_FLAGS_MASK 0x00000001 +#define LDT_entry_a(info) \ + ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff)) -#define TLS_FLAG_WRITABLE 0x00000001 +#define LDT_entry_b(info) \ + (((info)->base_addr & 0xff000000) | \ + (((info)->base_addr & 0x00ff0000) >> 16) | \ + ((info)->limit & 0xf0000) | \ + (((info)->read_exec_only ^ 1) << 9) | \ + ((info)->contents << 10) | \ + (((info)->seg_not_present ^ 1) << 15) | \ + ((info)->seg_32bit << 22) | \ + ((info)->limit_in_pages << 23) | \ + ((info)->useable << 20) | \ + 0x7000) + +#define LDT_empty(info) (\ + (info)->base_addr == 0 && \ + (info)->limit == 0 && \ + (info)->contents == 0 && \ + (info)->read_exec_only == 1 && \ + (info)->seg_32bit == 0 && \ + (info)->limit_in_pages == 0 && \ + (info)->seg_not_present == 1 && \ + (info)->useable == 0 ) -static inline void load_TLS_desc(struct thread_struct *t, unsigned int cpu) +static inline void clear_TLS(struct thread_struct *t) { - cpu_gdt_table[cpu][TLS_ENTRY] = t->tls_desc; + t->nr_tls_bytes = 0; + t->first_tls_byte = 0; + t->last_tls_byte = (GDT_ENTRY_TLS_MAX + 1) * 8; +} + +static inline void load_TLS(struct thread_struct *prev, struct thread_struct *next, unsigned int cpu) +{ + int first_byte = min(prev->first_tls_byte, next->first_tls_byte); + int last_byte = max(prev->last_tls_byte, next->last_tls_byte); + + memcpy((char *)(cpu_gdt_table[cpu]) + first_byte, (char *)next->tls_array + first_byte, last_byte - first_byte); } static inline void clear_LDT(void) --- linux/include/asm-i386/processor.h.orig Sun Aug 11 17:01:07 2002 +++ linux/include/asm-i386/processor.h Sun Aug 11 23:28:44 2002 @@ -22,6 +22,11 @@ unsigned long a,b; }; +#define desc_empty(desc) \ + (!((desc)->a + (desc)->b)) + +#define desc_equal(desc1, desc2) \ + (((desc1)->a == (desc2)->a) && ((desc1)->b == (desc2)->b)) /* * Default implementation of macro that returns current * instruction pointer ("program counter"). @@ -376,8 +381,16 @@ unsigned long v86flags, v86mask, v86mode, saved_esp0; /* IO permissions */ unsigned long *ts_io_bitmap; -/* TLS cached descriptor */ - struct desc_struct tls_desc; + + /* + * cached TLS descriptors. + * + * The offset calculation is needed to not copy the whole TLS + * into the local GDT all the time. + * We count offsets in bytes to reduce context-switch overhead. + */ + int nr_tls_bytes, first_tls_byte, last_tls_byte; + struct desc_struct tls_array[GDT_ENTRY_TLS_MAX + 1]; }; #define INIT_THREAD { \ @@ -401,7 +414,7 @@ 0,0,0,0, /* esp,ebp,esi,edi */ \ 0,0,0,0,0,0, /* es,cs,ss */ \ 0,0,0,0,0,0, /* ds,fs,gs */ \ - LDT_ENTRY,0, /* ldt */ \ + GDT_ENTRY_LDT,0, /* ldt */ \ 0, INVALID_IO_BITMAP_OFFSET, /* tace, bitmap */ \ {~0, } /* ioperm */ \ } --- linux/include/asm-i386/segment.h.orig Sun Jun 9 07:28:19 2002 +++ linux/include/asm-i386/segment.h Sun Aug 11 23:28:44 2002 @@ -1,10 +1,84 @@ #ifndef _ASM_SEGMENT_H #define _ASM_SEGMENT_H -#define __KERNEL_CS 0x10 -#define __KERNEL_DS 0x18 +/* + * The layout of the per-CPU GDT under Linux: + * + * 0 - null + * + * ------- start of TLS (Thread-Local Storage) segments: + * + * 1 - TLS segment #1 [ default user CS ] + * 2 - TLS segment #2 [ default user DS ] + * 3 - TLS segment #3 [ glibc's TLS segment ] + * 4 - TLS segment #4 [ Wine's %fs Win32 segment ] + * 5 - TLS segment #5 + * 6 - TLS segment #6 + * 7 - TLS segment #7 + * 8 - TLS segment #8 [ segment 0040 used by Wine ] + * 9 - TLS segment #9 + * 10 - TLS segment #9 + * 11 - TLS segment #9 + * + * ------- start of kernel segments, on a full cacheline: + * + * 12 - kernel code segment <==== new cacheline + * 13 - kernel data segment + * 14 - TSS + * 15 - LDT + * + * ------- these are the less performance-sensitive segments: + * + * 16 - APM BIOS support + * 17 - APM BIOS support + * 18 - APM BIOS support + * 19 - APM BIOS support + * 20 - PNPBIOS support (16->32 gate) + * 21 - PNPBIOS support + * 22 - PNPBIOS support + * 23 - PNPBIOS support + * 24 - PNPBIOS support + * 25 - reserved + * 26 - reserved + * 27 - reserved + */ +#define GDT_ENTRY_TLS_ENTRIES 11 +#define GDT_ENTRY_TLS_MIN 1 +#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1) -#define __USER_CS 0x23 -#define __USER_DS 0x2B +#define GDT_ENTRY_DEFAULT_USER_CS (GDT_ENTRY_TLS_MIN + 0) +#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS * 8 + 3) + +#define GDT_ENTRY_DEFAULT_USER_DS (GDT_ENTRY_TLS_MIN + 1) +#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS * 8 + 3) + + +#define GDT_ENTRY_KERNEL_BASE 12 + +#define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE + 0) +#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8) + +#define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE + 1) +#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8) + +#define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE + 2) +#define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE + 3) + +#define GDT_ENTRY_APMBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 4) +#define GDT_ENTRY_PNPBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 8) + +/* + * The GDT has 25 entries but we pad it to cacheline boundary: + */ +#define GDT_ENTRIES 28 + +#define GDT_SIZE (GDT_ENTRIES * 8) + +/* + * The interrupt descriptor table has room for 256 idt's, + * the global descriptor table is dependent on the number + * of tasks we can have.. + */ +#define IDT_ENTRIES 256 #endif --- linux/include/asm-i386/unistd.h.orig Sun Aug 11 17:01:07 2002 +++ linux/include/asm-i386/unistd.h Sun Aug 11 23:28:44 2002 @@ -248,6 +248,7 @@ #define __NR_sched_setaffinity 241 #define __NR_sched_getaffinity 242 #define __NR_set_thread_area 243 +#define __NR_get_thread_area 244 /* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */ [-- Attachment #2: Type: TEXT/PLAIN, Size: 5479 bytes --] #include <asm/ldt.h> #include <stdio.h> #include <linux/unistd.h> #include <signal.h> #include <unistd.h> #include <stdlib.h> #include <pthread.h> #include <asm/sigcontext.h> #include <linux/unistd.h> /* * TLS functionality testing utility. */ #define __NR_set_thread_area 243 _syscall1(int, set_thread_area, struct modify_ldt_ldt_s *, info) #define __NR_get_thread_area 244 _syscall1(int, get_thread_area, struct modify_ldt_ldt_s *, info) static inline void initseg (int seg) { asm ("mov %w0,%%fs" : : "r" (seg)); } static inline unsigned char __readseg (unsigned offset) { unsigned char res; asm ("fs; movb (%1),%%al" : "=a" (res) : "r" (offset)); return res; } static inline void __writeseg (unsigned offset, unsigned char b) { asm ("fs; movb %b1,(%0)" : : "r" (offset), "r" (b)); } static void readseg (void *dst, const void *src) { *(char *)dst = __readseg((unsigned int)src); } static void writeseg (void *dst, unsigned char value) { __writeseg((unsigned int)dst, value); } unsigned char pre_data [4096] = { [ 0 ... 4095 ] = 33 }; unsigned char data [4096] = { [ 0 ... 4095 ] = 44 }; unsigned char post_data [4096] = { [ 0 ... 4095 ] = 55 }; static void print_info (struct modify_ldt_ldt_s *info) { printf("info %p:\n", info); #define P(f) printf("..."#f": %d.\n", info->##f) P(entry_number); P(base_addr); P(limit); P(seg_32bit); P(contents); P(read_exec_only); P(limit_in_pages); P(seg_not_present); P(useable); } int main (void) { int i, idx, seg, ret; unsigned int base; unsigned char result; struct modify_ldt_ldt_s info, info2; memset(&info, 0, sizeof(info)); memset(&info2, 0, sizeof(info2)); info.entry_number = -1; info.base_addr = 0; info.limit = 0xfffff; info.seg_32bit = 1; info.contents = MODIFY_LDT_CONTENTS_DATA; info.read_exec_only = 0; info.limit_in_pages = 1; info.seg_not_present = 0; data[0] = 123; data[4096] = 210; base = 0; info.base_addr = base; printf("\ndoing set_thread_area(%08x):\n", base); ret = set_thread_area(&info); if (ret < 0) { printf("ret: %d, TEST FAILED!\n", ret); exit(1); } idx = info.entry_number; seg = idx * 8 + 3; printf("got idx: %d (sel: %02x)\n", idx, seg); initseg(seg); printf("\nreading %p byte of [0x%08x] TLS:\n", &data, base); readseg (&result, &data); if (result == 123) printf("====> %d --- TEST PASSED.\n\n", result); else printf("====> %d --- TEST FAILURE!\n\n", result); info.entry_number = -1; base = (unsigned int)&data; info.base_addr = base; printf("\ndoing set_thread_area(%08x):\n", base); ret = set_thread_area(&info); if (ret < 0) { printf("ret: %d, TEST FAILED!\n", ret); exit(1); } idx = info.entry_number; seg = idx * 8 + 3; printf("got idx: %d (sel: %02x)\n", idx, seg); initseg(seg); printf("\nreading %p byte of [0x%08x] TLS:\n", &data, base); readseg (&result, 0); if (result == 123) printf("====> %d --- TEST PASSED.\n\n", result); else printf("====> %d --- TEST FAILURE!\n\n", result); printf("\nreading TLS idx %d's descriptor.\n", idx); info2.entry_number = idx; ret = get_thread_area(&info2); if (ret < 0) { printf("ret: %d, TEST FAILED!\n", ret); exit(1); } if (memcmp(&info, &info2, sizeof(info))) { printf("huh, info != info2? (%d)\n", memcmp(&info, &info2, sizeof(info))); print_info(&info); print_info(&info2); } else printf("info == info2 - TEST PASSED.\n"); printf("\nclearing TLS idx %d's descriptor.\n", idx); info.entry_number = idx; info.base_addr = 0; info.limit = 0; info.seg_32bit = 0; info.contents = 0; info.read_exec_only = 1; info.limit_in_pages = 0; info.seg_not_present = 1; ret = set_thread_area(&info); if (ret < 0) { printf("ret: %d, TEST FAILED!\n", ret); exit(1); } printf("TEST PASSED.\n"); base = (unsigned int) &data; printf("\nre-allocating TLS idx %d's descriptor.\n", idx); for (i = 0; i < 2; i++) { info.entry_number = -1; info.base_addr = base; info.limit = 0xfffff; info.seg_32bit = 1; info.contents = MODIFY_LDT_CONTENTS_DATA; info.read_exec_only = 0; info.limit_in_pages = 1; info.seg_not_present = 0; ret = set_thread_area(&info); if (ret < 0) { printf("ret: %d, TEST FAILED!\n", ret); exit(1); } if (!i && (idx != info.entry_number)) { printf("idx %d != entry_number %d! TEST FAILED!\n", idx, info.entry_number); exit(1); } idx = info.entry_number; seg = idx * 8 + 3; printf("got idx: %d (sel: %02x)\n", idx, seg); sleep(1); initseg(seg); } printf("TEST PASSED.\n\n"); printf("writing last byte of 4097 byte [0x%08x] TLS:\n", base); writeseg ((void *)4096, 234); readseg (&result, (void *)4096); if (result == 234) printf("====> %d --- TEST PASSED.\n", result); else printf("====> %d --- TEST FAILURE!.\n", result); printf("writing read-only segment [0x%08x] (should coredump):\n", base); info.entry_number = -1; info.read_exec_only = 1; base = (unsigned int)&data; info.base_addr = base; ret = set_thread_area(&info); if (ret < 0) { printf("ret: %d, TEST FAILED!\n", ret); exit(1); } idx = info.entry_number; seg = idx * 8 + 3; printf("got idx: %d (sel: %02x)\n", idx, seg); initseg(seg); writeseg ((void *)4096, 234); printf("====> %d --- TEST FAILURE!.\n", result); return 0; } ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [patch] tls-2.5.31-C3 2002-08-11 21:46 ` [patch] tls-2.5.31-C3 Ingo Molnar @ 2002-08-12 7:34 ` Stephen Rothwell 2002-08-12 10:07 ` Ingo Molnar 2002-08-12 12:18 ` Luca Barbieri 2002-08-12 15:53 ` [patch] tls-2.5.31-D3 Ingo Molnar 2 siblings, 1 reply; 47+ messages in thread From: Stephen Rothwell @ 2002-08-12 7:34 UTC (permalink / raw) To: Ingo Molnar; +Cc: torvalds, linux-kernel, julliard, ldb Hi Ingo, On Sun, 11 Aug 2002 23:46:01 +0200 (CEST) Ingo Molnar <mingo@elte.hu> wrote: > > /* > * The APM segments have byte granularity and their bases > * and limits are set at run time. > */ > - .quad 0x0040920000000000 /* 0x40 APM set up for bad BIOS's */ > - .quad 0x00409a0000000000 /* 0x48 APM CS code */ > - .quad 0x00009a0000000000 /* 0x50 APM CS 16 code (16 bit) */ > - .quad 0x0040920000000000 /* 0x58 APM DS data */ > + .quad 0x0040920000000000 /* 0x80 APM set up for bad BIOS's */ > + .quad 0x00409a0000000000 /* 0x88 APM CS code */ > + .quad 0x00009a0000000000 /* 0x90 APM CS 16 code (16 bit) */ > + .quad 0x0040920000000000 /* 0x98 APM DS data */ I just lost 0x40 which needs to be exactly 0x40 if it is do its job (i.e. cope with brain dead BIOS writers using 0x40 as a segment offset in protected mode ... The idea is that segment 0x40 maps from physical address 0x400 to the end of the first physical page. As a real mode program would (more or less) expect it to. The other three segments don't matter as longs as they are in that order and contiguous. -- Cheers, Stephen Rothwell sfr@canb.auug.org.au http://www.canb.auug.org.au/~sfr/ ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [patch] tls-2.5.31-C3 2002-08-12 7:34 ` Stephen Rothwell @ 2002-08-12 10:07 ` Ingo Molnar 2002-08-12 8:23 ` Stephen Rothwell 2002-08-12 14:46 ` Stephen Rothwell 0 siblings, 2 replies; 47+ messages in thread From: Ingo Molnar @ 2002-08-12 10:07 UTC (permalink / raw) To: Stephen Rothwell; +Cc: torvalds, linux-kernel, julliard, ldb On Mon, 12 Aug 2002, Stephen Rothwell wrote: > > - .quad 0x0040920000000000 /* 0x40 APM set up for bad BIOS's */ > > - .quad 0x00409a0000000000 /* 0x48 APM CS code */ > > - .quad 0x00009a0000000000 /* 0x50 APM CS 16 code (16 bit) */ > > - .quad 0x0040920000000000 /* 0x58 APM DS data */ > > + .quad 0x0040920000000000 /* 0x80 APM set up for bad BIOS's */ > > + .quad 0x00409a0000000000 /* 0x88 APM CS code */ > > + .quad 0x00009a0000000000 /* 0x90 APM CS 16 code (16 bit) */ > > + .quad 0x0040920000000000 /* 0x98 APM DS data */ > > I just lost 0x40 which needs to be exactly 0x40 if it is do its job > (i.e. cope with brain dead BIOS writers using 0x40 as a segment offset > in protected mode ... you can save/restore 0x40 in kernel-space if you need to no problem. > The idea is that segment 0x40 maps from physical address 0x400 to the > end of the first physical page. As a real mode program would (more or > less) expect it to. so you are using the kernel's GDT in real mode as well? Ingo ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [patch] tls-2.5.31-C3 2002-08-12 10:07 ` Ingo Molnar @ 2002-08-12 8:23 ` Stephen Rothwell 2002-08-12 10:08 ` Alan Cox 2002-08-12 14:46 ` Stephen Rothwell 1 sibling, 1 reply; 47+ messages in thread From: Stephen Rothwell @ 2002-08-12 8:23 UTC (permalink / raw) To: Ingo Molnar; +Cc: torvalds, linux-kernel, julliard, ldb On Mon, 12 Aug 2002 12:07:19 +0200 (CEST) Ingo Molnar <mingo@elte.hu> wrote: > > you can save/restore 0x40 in kernel-space if you need to no problem. I guess I could around every BIOS call ... Also, Alan (Cox) will say that's OK until he does APM on SMP on broken BIOS's :-) We could also just say that we no longer support those broken BIOS's ... > so you are using the kernel's GDT in real mode as well? No. The problem is that there are some BIOS's that contain code that (even though they are called in protected mode) load 0x40 into ds and expect to be able to reference stuff ... Causes really interesting OOPSs :-( -- Cheers, Stephen Rothwell sfr@canb.auug.org.au http://www.canb.auug.org.au/~sfr/ ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [patch] tls-2.5.31-C3 2002-08-12 8:23 ` Stephen Rothwell @ 2002-08-12 10:08 ` Alan Cox 2002-08-12 10:49 ` Ingo Molnar ` (2 more replies) 0 siblings, 3 replies; 47+ messages in thread From: Alan Cox @ 2002-08-12 10:08 UTC (permalink / raw) To: Stephen Rothwell; +Cc: Ingo Molnar, Linus Torvalds, linux-kernel, julliard, ldb On Mon, 2002-08-12 at 09:23, Stephen Rothwell wrote: > > you can save/restore 0x40 in kernel-space if you need to no problem. > I guess I could around every BIOS call ... > > Also, Alan (Cox) will say that's OK until he does APM on SMP on broken > BIOS's :-) SMP actually makes no difference. I have full SMP APM working on my test boxes now. However pre-empt and SMP are the same problem space > We could also just say that we no longer support those broken BIOS's ... > > > so you are using the kernel's GDT in real mode as well? Yes. APM calls are made by all sorts of processes. > No. The problem is that there are some BIOS's that contain code that (even > though they are called in protected mode) load 0x40 into ds and expect to > be able to reference stuff ... Causes really interesting OOPSs :-( Which does mean you can steal the old TLS value and put it back across the calls just by changing the TLS data for that process. For that matter on Windows emulation I thought Windows also needed 0x40 to be the same offset as the BIOS does so can't we leave it hardwired ? ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [patch] tls-2.5.31-C3 2002-08-12 10:08 ` Alan Cox @ 2002-08-12 10:49 ` Ingo Molnar 2002-08-12 10:34 ` Alan Cox 2002-08-12 10:35 ` Alan Cox 2002-08-12 13:10 ` Kasper Dupont 2002-08-12 15:20 ` Ingo Molnar 2 siblings, 2 replies; 47+ messages in thread From: Ingo Molnar @ 2002-08-12 10:49 UTC (permalink / raw) To: Alan Cox; +Cc: Stephen Rothwell, Linus Torvalds, linux-kernel, julliard, ldb On 12 Aug 2002, Alan Cox wrote: > > No. The problem is that there are some BIOS's that contain code that (even > > though they are called in protected mode) load 0x40 into ds and expect to > > be able to reference stuff ... Causes really interesting OOPSs :-( > > Which does mean you can steal the old TLS value and put it back across > the calls just by changing the TLS data for that process. For that > matter on Windows emulation I thought Windows also needed 0x40 to be the > same offset as the BIOS does so can't we leave it hardwired ? i have no problem with hardwiring it (and excluding it from the TLS allocation/setting syscalls) - in fact i almost did it that way. The question is, is the required descriptor format 100% the same for all APM variants, Wine and Windows and DOS emulators? It would suck if we had a bad descriptor and also removed the ability of Wine to trap 0x40 access. but, couldnt APM use its own private GDT for real-mode calls, with 0x40 filled in properly? That would pretty much decouple things. Ingo ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [patch] tls-2.5.31-C3 2002-08-12 10:49 ` Ingo Molnar @ 2002-08-12 10:34 ` Alan Cox 2002-08-12 12:17 ` Ingo Molnar 2002-08-12 10:35 ` Alan Cox 1 sibling, 1 reply; 47+ messages in thread From: Alan Cox @ 2002-08-12 10:34 UTC (permalink / raw) To: Ingo Molnar; +Cc: Stephen Rothwell, Linus Torvalds, linux-kernel, julliard, ldb On Mon, 2002-08-12 at 11:49, Ingo Molnar wrote: > but, couldnt APM use its own private GDT for real-mode calls, with 0x40 > filled in properly? That would pretty much decouple things. That would get extremely messy when handing interrupts arriving while in an APM bios call (which is required on many laptops). I believe the 0x40 = 0x40 assumption is identical across windows, buggy apm, buggy bios32, buggy edd, buggy .. (you get the picture) ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [patch] tls-2.5.31-C3 2002-08-12 10:34 ` Alan Cox @ 2002-08-12 12:17 ` Ingo Molnar 2002-08-12 11:47 ` Alan Cox 0 siblings, 1 reply; 47+ messages in thread From: Ingo Molnar @ 2002-08-12 12:17 UTC (permalink / raw) To: Alan Cox; +Cc: Stephen Rothwell, Linus Torvalds, linux-kernel, julliard, ldb On 12 Aug 2002, Alan Cox wrote: > That would get extremely messy when handing interrupts arriving while in > an APM bios call (which is required on many laptops). I believe the 0x40 > = 0x40 assumption is identical across windows, buggy apm, buggy bios32, > buggy edd, buggy .. (you get the picture) ugh, we do Linux interrupts while in the APM BIOS? in any case, it should be possible to create a 'minimal GDT' for the APM BIOS [so that Linux interrupt handling is still possible] - to isolate it from Linux as much as possible. But i agree that this gets messy ... Ingo ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [patch] tls-2.5.31-C3 2002-08-12 12:17 ` Ingo Molnar @ 2002-08-12 11:47 ` Alan Cox 2002-08-12 12:55 ` Ingo Molnar 0 siblings, 1 reply; 47+ messages in thread From: Alan Cox @ 2002-08-12 11:47 UTC (permalink / raw) To: Ingo Molnar; +Cc: Stephen Rothwell, Linus Torvalds, linux-kernel, julliard, ldb On Mon, 2002-08-12 at 13:17, Ingo Molnar wrote: > ugh, we do Linux interrupts while in the APM BIOS? We have to. Most APM bios expects interrupts to be happening. In pre-emptive mode we may well even be switching to/from APM BIOS code in 2.5 at the moment. I've not looked into that. ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [patch] tls-2.5.31-C3 2002-08-12 11:47 ` Alan Cox @ 2002-08-12 12:55 ` Ingo Molnar 2002-08-12 12:29 ` Alan Cox 0 siblings, 1 reply; 47+ messages in thread From: Ingo Molnar @ 2002-08-12 12:55 UTC (permalink / raw) To: Alan Cox; +Cc: Stephen Rothwell, Linus Torvalds, linux-kernel, julliard, ldb On 12 Aug 2002, Alan Cox wrote: > > ugh, we do Linux interrupts while in the APM BIOS? > > We have to. Most APM bios expects interrupts to be happening. In > pre-emptive mode we may well even be switching to/from APM BIOS code in > 2.5 at the moment. I've not looked into that. i think that since we hold the APM spinlock (do we always, when calling into the APM BIOS?), we should not preempt any APM BIOS code. Ingo ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [patch] tls-2.5.31-C3 2002-08-12 12:55 ` Ingo Molnar @ 2002-08-12 12:29 ` Alan Cox 0 siblings, 0 replies; 47+ messages in thread From: Alan Cox @ 2002-08-12 12:29 UTC (permalink / raw) To: Ingo Molnar; +Cc: Stephen Rothwell, Linus Torvalds, linux-kernel, julliard, ldb On Mon, 2002-08-12 at 13:55, Ingo Molnar wrote: > > On 12 Aug 2002, Alan Cox wrote: > > > > ugh, we do Linux interrupts while in the APM BIOS? > > > > We have to. Most APM bios expects interrupts to be happening. In > > pre-emptive mode we may well even be switching to/from APM BIOS code in > > 2.5 at the moment. I've not looked into that. > > i think that since we hold the APM spinlock (do we always, when calling > into the APM BIOS?), we should not preempt any APM BIOS code. Looking at the 2.5.29 tree I have handy here there is no APM spinlock. I don't have 2.5.30/31 unpacked to check those ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [patch] tls-2.5.31-C3 2002-08-12 10:49 ` Ingo Molnar 2002-08-12 10:34 ` Alan Cox @ 2002-08-12 10:35 ` Alan Cox 1 sibling, 0 replies; 47+ messages in thread From: Alan Cox @ 2002-08-12 10:35 UTC (permalink / raw) To: Ingo Molnar; +Cc: Stephen Rothwell, Linus Torvalds, linux-kernel, julliard, ldb On Mon, 2002-08-12 at 11:49, Ingo Molnar wrote: > but, couldnt APM use its own private GDT for real-mode calls, with 0x40 > filled in properly? That would pretty much decouple things. Oh and secondly they are not actually real mode calls, they are protected mode 32bit calls with certain segment registers set up to point to specific things taken from the apm bios 32 interface ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [patch] tls-2.5.31-C3 2002-08-12 10:08 ` Alan Cox 2002-08-12 10:49 ` Ingo Molnar @ 2002-08-12 13:10 ` Kasper Dupont 2002-08-12 15:20 ` Ingo Molnar 2 siblings, 0 replies; 47+ messages in thread From: Kasper Dupont @ 2002-08-12 13:10 UTC (permalink / raw) To: Alan Cox Cc: Stephen Rothwell, Ingo Molnar, Linus Torvalds, linux-kernel, julliard, ldb Alan Cox wrote: > > For that > matter on Windows emulation I thought Windows also needed 0x40 to be the > same offset as the BIOS does so can't we leave it hardwired ? Does Wine and the BIOS actually want the same? I would believe there would have to be a small difference. Having Wine and BIOS using the same memory doesn't sound right to me. Wine wanting segment 0x40 to point to virtual address 0x400 and BIOS wanting segment 0x40 to point to physical address 0x400 sounds more reasonable to me. But physical address 0x400 would be virtual address 0xC0000400 with the default PAGE_OFFSET. -- Kasper Dupont -- der bruger for meget tid på usenet. For sending spam use mailto:aaarep@daimi.au.dk or mailto:mcxumhvenwblvtl@skrammel.yaboo.dk ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [patch] tls-2.5.31-C3 2002-08-12 10:08 ` Alan Cox 2002-08-12 10:49 ` Ingo Molnar 2002-08-12 13:10 ` Kasper Dupont @ 2002-08-12 15:20 ` Ingo Molnar 2 siblings, 0 replies; 47+ messages in thread From: Ingo Molnar @ 2002-08-12 15:20 UTC (permalink / raw) To: Alan Cox; +Cc: Stephen Rothwell, Linus Torvalds, linux-kernel, julliard, ldb On 12 Aug 2002, Alan Cox wrote: > Which does mean you can steal the old TLS value and put it back across > the calls just by changing the TLS data for that process. [...] yes - the 0x40 segment can be saved & restored safely. We have per-CPU GDTs so nobody can modify them while the APM BIOS is executing. (assuming preemption is disabled.) > [...] For that matter on Windows emulation I thought Windows also needed > 0x40 to be the same offset as the BIOS does so can't we leave it > hardwired ? another thing: do we want this with descriptor priviledge level 3? Because the APM 0x40 GDT entry was a ring 0 descriptor, but that would not be accessible to Wine or DOSEMU. Ingo ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [patch] tls-2.5.31-C3 2002-08-12 10:07 ` Ingo Molnar 2002-08-12 8:23 ` Stephen Rothwell @ 2002-08-12 14:46 ` Stephen Rothwell 1 sibling, 0 replies; 47+ messages in thread From: Stephen Rothwell @ 2002-08-12 14:46 UTC (permalink / raw) To: Ingo Molnar; +Cc: linux-kernel, Alan Cox On Mon, 12 Aug 2002 12:07:19 +0200 (CEST) Ingo Molnar <mingo@elte.hu> wrote: > > you can save/restore 0x40 in kernel-space if you need to no problem. How about the following (untested, not even compiled): -- Cheers, Stephen Rothwell sfr@canb.auug.org.au http://www.canb.auug.org.au/~sfr/ diff -ruN 2.5.31/arch/i386/kernel/apm.c 2.5.31-apm.1/arch/i386/kernel/apm.c --- 2.5.31/arch/i386/kernel/apm.c 2002-08-02 11:11:34.000000000 +1000 +++ 2.5.31-apm.1/arch/i386/kernel/apm.c 2002-08-13 00:20:56.000000000 +1000 @@ -215,6 +215,7 @@ #include <linux/pm.h> #include <linux/kernel.h> #include <linux/smp_lock.h> +#include <linux/smp.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -419,6 +420,7 @@ static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue); static struct apm_user * user_list; static spinlock_t user_list_lock = SPIN_LOCK_UNLOCKED; +static struct desc_struct bad_bios_desc = { 0, 0x00409200 }; static char driver_version[] = "1.16"; /* no spaces */ @@ -569,7 +571,12 @@ { APM_DECL_SEGS unsigned long flags; + int cpu; + struct desc_struct save_desc_40; + cpu = get_cpu(); + save_desc_40 = cpu_gdt_table[cpu][0x40 / 8]; + cpu_gdt_table[cpu][0x40 / 8] = bad_bios_desc; local_save_flags(flags); APM_DO_CLI; APM_DO_SAVE_SEGS; @@ -591,6 +598,8 @@ : "memory", "cc"); APM_DO_RESTORE_SEGS; local_irq_restore(flags); + cpu_gdt_table[cpu][0x40 / 8] = save_desc_40; + put_cpu(); return *eax & 0xff; } @@ -613,7 +622,12 @@ u8 error; APM_DECL_SEGS unsigned long flags; + int cpu; + struct desc_struct save_desc_40; + cpu = get_cpu(); + save_desc_40 = cpu_gdt_table[cpu][0x40 / 8]; + cpu_gdt_table[cpu][0x40 / 8] = bad_bios_desc; local_save_flags(flags); APM_DO_CLI; APM_DO_SAVE_SEGS; @@ -639,6 +653,8 @@ } APM_DO_RESTORE_SEGS; local_irq_restore(flags); + cpu_gdt_table[smp_processor_id()][0x40 / 8] = save_desc_40; + put_cpu(); return error; } @@ -1923,17 +1939,14 @@ * that extends up to the end of page zero (that we have reserved). * This is for buggy BIOS's that refer to (real mode) segment 0x40 * even though they are called in protected mode. - * - * NOTE: on SMP we call into the APM BIOS only on CPU#0, so it's - * enough to modify CPU#0's GDT. */ - for (i = 0; i < NR_CPUS; i++) { - set_base(cpu_gdt_table[i][APM_40 >> 3], - __va((unsigned long)0x40 << 4)); - _set_limit((char *)&cpu_gdt_table[i][APM_40 >> 3], 4095 - (0x40 << 4)); + set_base(bad_bios_desc, __va((unsigned long)0x40 << 4)); + _set_limit((char *)&bad_bios_desc, 4095 - (0x40 << 4)); + + apm_bios_entry.offset = apm_info.bios.offset; + apm_bios_entry.segment = APM_CS; - apm_bios_entry.offset = apm_info.bios.offset; - apm_bios_entry.segment = APM_CS; + for (i = 0; i < NR_CPUS; i++) { set_base(cpu_gdt_table[i][APM_CS >> 3], __va((unsigned long)apm_info.bios.cseg << 4)); set_base(cpu_gdt_table[i][APM_CS_16 >> 3], diff -ruN 2.5.31/arch/i386/kernel/head.S 2.5.31-apm.1/arch/i386/kernel/head.S --- 2.5.31/arch/i386/kernel/head.S 2002-07-28 21:11:25.000000000 +1000 +++ 2.5.31-apm.1/arch/i386/kernel/head.S 2002-08-13 00:29:38.000000000 +1000 @@ -427,7 +427,10 @@ * The APM segments have byte granularity and their bases * and limits are set at run time. */ - .quad 0x0040920000000000 /* 0x40 APM set up for bad BIOS's */ + .quad 0x0000000000000000 /* 0x40 APM will be used for bad BIOS's + * Will be saved and restored + * across BIOS calls. MUST NOT BE ONE + * OF THE FOLLOWING THREE! */ .quad 0x00409a0000000000 /* 0x48 APM CS code */ .quad 0x00009a0000000000 /* 0x50 APM CS 16 code (16 bit) */ .quad 0x0040920000000000 /* 0x58 APM DS data */ diff -ruN 2.5.31/include/linux/apm_bios.h 2.5.31-apm.1/include/linux/apm_bios.h --- 2.5.31/include/linux/apm_bios.h 2001-08-14 09:39:28.000000000 +1000 +++ 2.5.31-apm.1/include/linux/apm_bios.h 2002-08-13 00:38:52.000000000 +1000 @@ -21,8 +21,7 @@ #ifdef __KERNEL__ -#define APM_40 0x40 -#define APM_CS (APM_40 + 8) +#define APM_CS 0x48 #define APM_CS_16 (APM_CS + 8) #define APM_DS (APM_CS_16 + 8) ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [patch] tls-2.5.31-C3 2002-08-11 21:46 ` [patch] tls-2.5.31-C3 Ingo Molnar 2002-08-12 7:34 ` Stephen Rothwell @ 2002-08-12 12:18 ` Luca Barbieri 2002-08-12 15:12 ` Ingo Molnar 2002-08-12 15:53 ` [patch] tls-2.5.31-D3 Ingo Molnar 2 siblings, 1 reply; 47+ messages in thread From: Luca Barbieri @ 2002-08-12 12:18 UTC (permalink / raw) To: Ingo Molnar; +Cc: Linus Torvalds, Linux-Kernel ML, Alexandre Julliard [-- Attachment #1: Type: text/plain, Size: 490 bytes --] > Comments? Numbers: unconditional copy of 2 tls descs: 5 cycles this patch with 1 tls desc: 26 cycles this patch with 8 tls descs: 52 cycles lldt: 51 cycles lgdt: 50 cycles context switch: 2000 cycles (measured with pipe read/write and vmstat so it's not very accurate) So this patch causes a 1% context switch performance drop for multithreaded applications. Note: the benchmark doesn't include the initial test for non-zero nr_tls_bytes and doesn't include setting the LDT descriptor [-- Attachment #2: This is a digitally signed message part --] [-- Type: application/pgp-signature, Size: 189 bytes --] ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [patch] tls-2.5.31-C3 2002-08-12 12:18 ` Luca Barbieri @ 2002-08-12 15:12 ` Ingo Molnar 2002-08-12 13:43 ` Luca Barbieri 0 siblings, 1 reply; 47+ messages in thread From: Ingo Molnar @ 2002-08-12 15:12 UTC (permalink / raw) To: Luca Barbieri; +Cc: Linus Torvalds, Linux-Kernel ML, Alexandre Julliard On 12 Aug 2002, Luca Barbieri wrote: > Numbers: > unconditional copy of 2 tls descs: 5 cycles > this patch with 1 tls desc: 26 cycles > this patch with 8 tls descs: 52 cycles [ 0 tls descs: 2 cycles. ] but yes, this is rougly what i'd say this approach costs. > lldt: 51 cycles > lgdt: 50 cycles > context switch: 2000 cycles (measured with pipe read/write and vmstat so > it's not very accurate) > So this patch causes a 1% context switch performance drop for > multithreaded applications. how did you calculate this? glibc multithreaded applications can avoid the lldt via using the TLS, and thus it's a net win. Ingo ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [patch] tls-2.5.31-C3 2002-08-12 15:12 ` Ingo Molnar @ 2002-08-12 13:43 ` Luca Barbieri 2002-08-12 15:57 ` Ingo Molnar 0 siblings, 1 reply; 47+ messages in thread From: Luca Barbieri @ 2002-08-12 13:43 UTC (permalink / raw) To: Ingo Molnar; +Cc: Linus Torvalds, Linux-Kernel ML, Alexandre Julliard [-- Attachment #1: Type: text/plain, Size: 1547 bytes --] On Mon, 2002-08-12 at 17:12, Ingo Molnar wrote: > > On 12 Aug 2002, Luca Barbieri wrote: > > > Numbers: > > unconditional copy of 2 tls descs: 5 cycles > > this patch with 1 tls desc: 26 cycles > > this patch with 8 tls descs: 52 cycles > > [ 0 tls descs: 2 cycles. ] Yes but common multithreaded applications will have at least 1 for pthreads. > but yes, this is rougly what i'd say this approach costs. > > > lldt: 51 cycles > > lgdt: 50 cycles > > context switch: 2000 cycles (measured with pipe read/write and vmstat so > > it's not very accurate) > > > So this patch causes a 1% context switch performance drop for > > multithreaded applications. > > how did you calculate this? ((26 - 5) / 2000) * 100 ~= 1 Benchmarks done in kernel mode (2.4.18) with interrupts disabled on a Pentium3 running the rdtsc timed benchmark in a loop 1 million times with 8 unbenchmarked iterations to warm up caches and with the time to execute an empty benchmark subtracted. > glibc multithreaded applications can avoid the > lldt via using the TLS, and thus it's a net win. Surely, this patch is better than the old LDT method but much worse than the 2-TLS one. So I would use the 2-TLS approach plus my patch plus the syscall and segment.h improvements of the tls-2.5.31-C3 patch plus support for setting the 0x40 segment around APM calls. BTW, are there any programs that would benefit from having more than 2 user-settable GDT entries but that don't need more than about 8? (assuming we have a fixed flat code and data segment and 0x40 segment) [-- Attachment #2: This is a digitally signed message part --] [-- Type: application/pgp-signature, Size: 189 bytes --] ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [patch] tls-2.5.31-C3 2002-08-12 13:43 ` Luca Barbieri @ 2002-08-12 15:57 ` Ingo Molnar 2002-08-12 14:17 ` Luca Barbieri 0 siblings, 1 reply; 47+ messages in thread From: Ingo Molnar @ 2002-08-12 15:57 UTC (permalink / raw) To: Luca Barbieri; +Cc: Linus Torvalds, Linux-Kernel ML, Alexandre Julliard On 12 Aug 2002, Luca Barbieri wrote: > > > Numbers: > > > unconditional copy of 2 tls descs: 5 cycles > > > this patch with 1 tls desc: 26 cycles > > > this patch with 8 tls descs: 52 cycles > > > > [ 0 tls descs: 2 cycles. ] > Yes but common multithreaded applications will have at least 1 for > pthreads. i would not say 'common' and 'multithreaded' in the same sentence. It might be so in the future, but it isnt today. > > how did you calculate this? > ((26 - 5) / 2000) * 100 ~= 1 > Benchmarks done in kernel mode (2.4.18) with interrupts disabled on a > Pentium3 running the rdtsc timed benchmark in a loop 1 million times > with 8 unbenchmarked iterations to warm up caches and with the time to > execute an empty benchmark subtracted. old libpthreads or new one? > > glibc multithreaded applications can avoid the > > lldt via using the TLS, and thus it's a net win. > Surely, this patch is better than the old LDT method but much worse than > the 2-TLS one. people asked for a 3rd TLS already. Ingo ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [patch] tls-2.5.31-C3 2002-08-12 15:57 ` Ingo Molnar @ 2002-08-12 14:17 ` Luca Barbieri 0 siblings, 0 replies; 47+ messages in thread From: Luca Barbieri @ 2002-08-12 14:17 UTC (permalink / raw) To: Ingo Molnar; +Cc: Linus Torvalds, Linux-Kernel ML, Alexandre Julliard [-- Attachment #1: Type: text/plain, Size: 1414 bytes --] On Mon, 2002-08-12 at 17:57, Ingo Molnar wrote: > > On 12 Aug 2002, Luca Barbieri wrote: > > > > > Numbers: > > > > unconditional copy of 2 tls descs: 5 cycles > > > > this patch with 1 tls desc: 26 cycles > > > > this patch with 8 tls descs: 52 cycles > > > > > > [ 0 tls descs: 2 cycles. ] > > Yes but common multithreaded applications will have at least 1 for > > pthreads. > > i would not say 'common' and 'multithreaded' in the same sentence. It > might be so in the future, but it isnt today. Most modern servers (e.g. Apache2, MySQL) are multithreaded and so are large desktop applications (e.g. Evolution, Galeon, Nautilus). > > > how did you calculate this? > > ((26 - 5) / 2000) * 100 ~= 1 > > Benchmarks done in kernel mode (2.4.18) with interrupts disabled on a > > Pentium3 running the rdtsc timed benchmark in a loop 1 million times > > with 8 unbenchmarked iterations to warm up caches and with the time to > > execute an empty benchmark subtracted. > > old libpthreads or new one? What are you asking about? (benchmarks are in kernel mode and context switch is from forked processes) > > > glibc multithreaded applications can avoid the > > > lldt via using the TLS, and thus it's a net win. > > Surely, this patch is better than the old LDT method but much worse than > > the 2-TLS one. > > people asked for a 3rd TLS already. It would be interesting to know what they would use it for. [-- Attachment #2: This is a digitally signed message part --] [-- Type: application/pgp-signature, Size: 189 bytes --] ^ permalink raw reply [flat|nested] 47+ messages in thread
* [patch] tls-2.5.31-D3 2002-08-11 21:46 ` [patch] tls-2.5.31-C3 Ingo Molnar 2002-08-12 7:34 ` Stephen Rothwell 2002-08-12 12:18 ` Luca Barbieri @ 2002-08-12 15:53 ` Ingo Molnar 2002-08-12 16:13 ` [patch] tls-2.5.31-D4 Ingo Molnar 2 siblings, 1 reply; 47+ messages in thread From: Ingo Molnar @ 2002-08-12 15:53 UTC (permalink / raw) To: Linus Torvalds; +Cc: linux-kernel, Alexandre Julliard, Luca Barbieri this is my latest TLS tree, changes relative to the 2.5.31-C3 patch: - streamlined GDT layout: * 0 - null * 1 - TLS segment #1 [ default user CS ] * 2 - TLS segment #2 [ default user DS ] * 3 - TLS segment #3 [ glibc's TLS segment ] * 4 - TLS segment #4 [ Wine's %fs Win32 segment ] * 5 - TLS segment #5 * 6 - TLS segment #6 * 7 - TLS segment #7 * 8 - APM BIOS support [ segment 0x40 ] * 9 - APM BIOS support * 10 - APM BIOS support * 11 - APM BIOS support * 12 - kernel code segment <==== new cacheline * 13 - kernel data segment * 14 - TSS * 15 - LDT * 16 - PNPBIOS support (16->32 gate) * 17 - PNPBIOS support * 18 - PNPBIOS support * 19 - PNPBIOS support * 20 - PNPBIOS support - simplified the TLS context-switch code, no more offsets, just a thread->private_tls flag tells whether the task has a non-default TLS. these two changes make the copying of the TLS faster as well - exactly 64 bytes need to be copied. Default memcpy() manages it in ~60 cycles, fully inlined memcpy code does it in ~30 cycles. I'm copying the NULL entry as well, to make the copy (and copy size) aligned on cacheline boundaries. The TLS area in the thread structure is not cacheline-aligned yet though. and the APM code should be back to functioning again. If a common 0x40 segment can be agreed on then the APM entry should be changed and made available to Wine - although i suspect Wine needs a 16-bit entry, while the APM one is a 32-bit entry ... Ingo --- linux/drivers/pnp/pnpbios_core.c.orig Sun Aug 11 17:01:17 2002 +++ linux/drivers/pnp/pnpbios_core.c Mon Aug 12 15:47:36 2002 @@ -90,7 +90,8 @@ static union pnp_bios_expansion_header * pnp_bios_hdr = NULL; /* The PnP BIOS entries in the GDT */ -#define PNP_GDT (0x0060) +#define PNP_GDT (GDT_ENTRY_PNPBIOS_BASE * 8) + #define PNP_CS32 (PNP_GDT+0x00) /* segment for calling fn */ #define PNP_CS16 (PNP_GDT+0x08) /* code segment for BIOS */ #define PNP_DS (PNP_GDT+0x10) /* data segment for BIOS */ --- linux/arch/i386/kernel/cpu/common.c.orig Sun Aug 11 17:01:06 2002 +++ linux/arch/i386/kernel/cpu/common.c Mon Aug 12 15:47:36 2002 @@ -423,6 +423,7 @@ { int cpu = smp_processor_id(); struct tss_struct * t = init_tss + cpu; + struct thread_struct *thread = ¤t->thread; if (test_and_set_bit(cpu, &cpu_initialized)) { printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); @@ -447,9 +448,13 @@ */ if (cpu) { memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE); - cpu_gdt_descr[cpu].size = GDT_SIZE; + cpu_gdt_descr[cpu].size = GDT_SIZE - 1; cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu]; } + /* + * Set up the per-thread TLS descriptor cache: + */ + memcpy(thread->tls_array, cpu_gdt_table[cpu], GDT_ENTRY_TLS_MAX * 8); __asm__ __volatile__("lgdt %0": "=m" (cpu_gdt_descr[cpu])); __asm__ __volatile__("lidt %0": "=m" (idt_descr)); @@ -468,9 +473,9 @@ BUG(); enter_lazy_tlb(&init_mm, current, cpu); - t->esp0 = current->thread.esp0; + t->esp0 = thread->esp0; set_tss_desc(cpu,t); - cpu_gdt_table[cpu][TSS_ENTRY].b &= 0xfffffdff; + cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff; load_TR_desc(); load_LDT(&init_mm.context); --- linux/arch/i386/kernel/entry.S.orig Sun Aug 11 17:01:07 2002 +++ linux/arch/i386/kernel/entry.S Mon Aug 12 15:47:36 2002 @@ -753,6 +753,7 @@ .long sys_sched_setaffinity .long sys_sched_getaffinity .long sys_set_thread_area + .long sys_get_thread_area .rept NR_syscalls-(.-sys_call_table)/4 .long sys_ni_syscall --- linux/arch/i386/kernel/head.S.orig Sun Aug 11 17:01:06 2002 +++ linux/arch/i386/kernel/head.S Mon Aug 12 15:47:36 2002 @@ -239,12 +239,7 @@ movl %eax,%es movl %eax,%fs movl %eax,%gs -#ifdef CONFIG_SMP - movl $(__KERNEL_DS), %eax - movl %eax,%ss # Reload the stack pointer (segment only) -#else - lss stack_start,%esp # Load processor stack -#endif + movl %eax,%ss xorl %eax,%eax lldt %ax cld # gcc2 wants the direction flag cleared at all times @@ -412,17 +407,17 @@ ALIGN /* - * The Global Descriptor Table contains 20 quadwords, per-CPU. + * The Global Descriptor Table contains 28 quadwords, per-CPU. */ ENTRY(cpu_gdt_table) .quad 0x0000000000000000 /* NULL descriptor */ - .quad 0x0000000000000000 /* TLS descriptor */ - .quad 0x00cf9a000000ffff /* 0x10 kernel 4GB code at 0x00000000 */ - .quad 0x00cf92000000ffff /* 0x18 kernel 4GB data at 0x00000000 */ - .quad 0x00cffa000000ffff /* 0x23 user 4GB code at 0x00000000 */ - .quad 0x00cff2000000ffff /* 0x2b user 4GB data at 0x00000000 */ - .quad 0x0000000000000000 /* TSS descriptor */ - .quad 0x0000000000000000 /* LDT descriptor */ + .quad 0x00cffa000000ffff /* 0x0b user 4GB code at 0x00000000 */ + .quad 0x00cff2000000ffff /* 0x13 user 4GB data at 0x00000000 */ + .quad 0x0000000000000000 /* 0x1b TLS entry 3 */ + .quad 0x0000000000000000 /* 0x23 TLS entry 4 */ + .quad 0x0000000000000000 /* 0x2b TLS entry 5 */ + .quad 0x0000000000000000 /* 0x33 TLS entry 6 */ + .quad 0x0000000000000000 /* 0x3b TLS entry 7 */ /* * The APM segments have byte granularity and their bases * and limits are set at run time. @@ -431,15 +426,21 @@ .quad 0x00409a0000000000 /* 0x48 APM CS code */ .quad 0x00009a0000000000 /* 0x50 APM CS 16 code (16 bit) */ .quad 0x0040920000000000 /* 0x58 APM DS data */ + + .quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */ + .quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */ + .quad 0x0000000000000000 /* 0x70 TSS descriptor */ + .quad 0x0000000000000000 /* 0x78 LDT descriptor */ + /* Segments used for calling PnP BIOS */ - .quad 0x00c09a0000000000 /* 0x60 32-bit code */ - .quad 0x00809a0000000000 /* 0x68 16-bit code */ - .quad 0x0080920000000000 /* 0x70 16-bit data */ - .quad 0x0080920000000000 /* 0x78 16-bit data */ - .quad 0x0080920000000000 /* 0x80 16-bit data */ - .quad 0x0000000000000000 /* 0x88 not used */ - .quad 0x0000000000000000 /* 0x90 not used */ - .quad 0x0000000000000000 /* 0x98 not used */ + .quad 0x00c09a0000000000 /* 0x80 32-bit code */ + .quad 0x00809a0000000000 /* 0x88 16-bit code */ + .quad 0x0080920000000000 /* 0x90 16-bit data */ + .quad 0x0080920000000000 /* 0x98 16-bit data */ + .quad 0x0080920000000000 /* 0xa0 16-bit data */ + .quad 0x0000000000000000 /* 0xa8 not used */ + .quad 0x0000000000000000 /* 0xb0 not used */ + .quad 0x0000000000000000 /* 0xb8 not used */ #if CONFIG_SMP .fill (NR_CPUS-1)*GDT_ENTRIES,8,0 /* other CPU's GDT */ --- linux/arch/i386/kernel/process.c.orig Sun Aug 11 17:01:08 2002 +++ linux/arch/i386/kernel/process.c Mon Aug 12 15:47:36 2002 @@ -681,11 +681,9 @@ /* * Load the per-thread Thread-Local Storage descriptor. - * - * NOTE: it's faster to do the two stores unconditionally - * than to branch away. */ - load_TLS_desc(next, cpu); + if (prev->private_tls || next->private_tls) + load_TLS(prev, next, cpu); /* * Save away %fs and %gs. No need to save %es and %ds, as @@ -834,35 +832,142 @@ #undef first_sched /* - * Set the Thread-Local Storage area: + * sys_alloc_thread_area: get a yet unused TLS descriptor index. */ -asmlinkage int sys_set_thread_area(unsigned long base, unsigned long flags) +static int get_free_idx(void) { struct thread_struct *t = ¤t->thread; - int writable = 0; - int cpu; + int idx; - /* do not allow unused flags */ - if (flags & ~TLS_FLAGS_MASK) + for (idx = GDT_ENTRY_TLS_MIN; idx <= GDT_ENTRY_TLS_MAX; idx++) + if (desc_empty(t->tls_array + idx)) + return idx; + return -ESRCH; +} + +static inline int private_tls(struct desc_struct *array) +{ + struct desc_struct *default_array = init_task.thread.tls_array; + + if (!memcmp(array, default_array, TLS_SIZE)) + return 0; + return 1; +} + +static inline int last_tls(struct desc_struct *array) +{ + struct desc_struct *default_array = init_task.thread.tls_array; + int idx; + + for (idx = GDT_ENTRY_TLS_MAX; idx >= GDT_ENTRY_TLS_MIN; idx--) + if (!desc_equal(array + idx, default_array + idx)) + return idx; + + return 0; +} + +#define CHECK_TLS_IDX(idx) \ +do { \ + if ((idx) < GDT_ENTRY_TLS_MIN || (idx) > GDT_ENTRY_TLS_MAX) \ + BUG(); \ +} while (0) + +/* + * Set a given TLS descriptor: + */ +asmlinkage int sys_set_thread_area(struct modify_ldt_ldt_s *u_info) +{ + struct thread_struct *t = ¤t->thread; + struct modify_ldt_ldt_s info; + struct desc_struct *desc; + int cpu, idx; + + if (copy_from_user(&info, u_info, sizeof(info))) + return -EFAULT; + idx = info.entry_number; + + /* + * index -1 means the kernel should try to find and + * allocate an empty descriptor: + */ + if (idx == -1) { + idx = get_free_idx(); + if (idx < 0) + return idx; + if (put_user(idx, &u_info->entry_number)) + return -EFAULT; + } + + if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) return -EINVAL; - if (flags & TLS_FLAG_WRITABLE) - writable = 1; + desc = t->tls_array + idx; /* * We must not get preempted while modifying the TLS. */ cpu = get_cpu(); - t->tls_desc.a = ((base & 0x0000ffff) << 16) | 0xffff; - - t->tls_desc.b = (base & 0xff000000) | ((base & 0x00ff0000) >> 16) | - 0xf0000 | (writable << 9) | (1 << 15) | - (1 << 22) | (1 << 23) | 0x7000; + if (LDT_empty(&info)) { + desc->a = 0; + desc->b = 0; + } else { + desc->a = LDT_entry_a(&info); + desc->b = LDT_entry_b(&info); + } + t->private_tls = private_tls(t->tls_array); + load_TLS(t, t, cpu); - load_TLS_desc(t, cpu); put_cpu(); - return TLS_ENTRY*8 + 3; + return 0; +} + +/* + * Get the current Thread-Local Storage area: + */ + +#define GET_BASE(desc) ( \ + (((desc)->a >> 16) & 0x0000ffff) | \ + (((desc)->b << 16) & 0x00ff0000) | \ + ( (desc)->b & 0xff000000) ) + +#define GET_LIMIT(desc) ( \ + ((desc)->a & 0x0ffff) | \ + ((desc)->b & 0xf0000) ) + +#define GET_32BIT(desc) (((desc)->b >> 23) & 1) +#define GET_CONTENTS(desc) (((desc)->b >> 10) & 3) +#define GET_WRITABLE(desc) (((desc)->b >> 9) & 1) +#define GET_LIMIT_PAGES(desc) (((desc)->b >> 23) & 1) +#define GET_PRESENT(desc) (((desc)->b >> 15) & 1) +#define GET_USEABLE(desc) (((desc)->b >> 20) & 1) + +asmlinkage int sys_get_thread_area(struct modify_ldt_ldt_s *u_info) +{ + struct modify_ldt_ldt_s info; + struct desc_struct *desc; + int idx; + + if (get_user(idx, &u_info->entry_number)) + return -EFAULT; + if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) + return -EINVAL; + + desc = current->thread.tls_array + idx; + + info.entry_number = idx; + info.base_addr = GET_BASE(desc); + info.limit = GET_LIMIT(desc); + info.seg_32bit = GET_32BIT(desc); + info.contents = GET_CONTENTS(desc); + info.read_exec_only = !GET_WRITABLE(desc); + info.limit_in_pages = GET_LIMIT_PAGES(desc); + info.seg_not_present = !GET_PRESENT(desc); + info.useable = GET_USEABLE(desc); + + if (copy_to_user(u_info, &info, sizeof(info))) + return -EFAULT; + return 0; } --- linux/arch/i386/kernel/suspend.c.orig Sun Aug 11 17:01:06 2002 +++ linux/arch/i386/kernel/suspend.c Mon Aug 12 15:47:37 2002 @@ -207,7 +207,7 @@ struct tss_struct * t = init_tss + cpu; set_tss_desc(cpu,t); /* This just modifies memory; should not be neccessary. But... This is neccessary, because 386 hardware has concept of busy tsc or some similar stupidity. */ - cpu_gdt_table[cpu][TSS_ENTRY].b &= 0xfffffdff; + cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff; load_TR_desc(); /* This does ltr */ load_LDT(¤t->mm->context); /* This does lldt */ --- linux/arch/i386/kernel/ldt.c.orig Sun Aug 11 17:01:04 2002 +++ linux/arch/i386/kernel/ldt.c Mon Aug 12 15:47:37 2002 @@ -200,32 +200,17 @@ /* Allow LDTs to be cleared by the user. */ if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { - if (oldmode || - (ldt_info.contents == 0 && - ldt_info.read_exec_only == 1 && - ldt_info.seg_32bit == 0 && - ldt_info.limit_in_pages == 0 && - ldt_info.seg_not_present == 1 && - ldt_info.useable == 0 )) { + if (oldmode || LDT_empty(&ldt_info)) { entry_1 = 0; entry_2 = 0; goto install; } } - entry_1 = ((ldt_info.base_addr & 0x0000ffff) << 16) | - (ldt_info.limit & 0x0ffff); - entry_2 = (ldt_info.base_addr & 0xff000000) | - ((ldt_info.base_addr & 0x00ff0000) >> 16) | - (ldt_info.limit & 0xf0000) | - ((ldt_info.read_exec_only ^ 1) << 9) | - (ldt_info.contents << 10) | - ((ldt_info.seg_not_present ^ 1) << 15) | - (ldt_info.seg_32bit << 22) | - (ldt_info.limit_in_pages << 23) | - 0x7000; - if (!oldmode) - entry_2 |= (ldt_info.useable << 20); + entry_1 = LDT_entry_a(&ldt_info); + entry_2 = LDT_entry_b(&ldt_info); + if (oldmode) + entry_2 &= ~(1 << 20); /* Install the new entry ... */ install: --- linux/arch/i386/boot/setup.S.orig Sun Jun 9 07:26:32 2002 +++ linux/arch/i386/boot/setup.S Mon Aug 12 15:47:37 2002 @@ -1005,9 +1005,14 @@ ret # Descriptor tables +# +# NOTE: if you think the GDT is large, you can make it smaller by just +# defining the KERNEL_CS and KERNEL_DS entries and shifting the gdt +# address down by GDT_ENTRY_KERNEL_CS*8. This puts bogus entries into +# the GDT, but those wont be used so it's not a problem. +# gdt: - .word 0, 0, 0, 0 # dummy - .word 0, 0, 0, 0 # unused + .fill GDT_ENTRY_KERNEL_CS,8,0 .word 0xFFFF # 4Gb - (0x100000*0x1000 = 4Gb) .word 0 # base address = 0 --- linux/include/linux/apm_bios.h.orig Sun Jun 9 07:30:24 2002 +++ linux/include/linux/apm_bios.h Mon Aug 12 15:47:37 2002 @@ -21,8 +21,8 @@ #ifdef __KERNEL__ -#define APM_40 0x40 -#define APM_CS (APM_40 + 8) +#define APM_40 (GDT_ENTRY_APMBIOS_BASE * 8) +#define APM_CS (APM_BASE + 8) #define APM_CS_16 (APM_CS + 8) #define APM_DS (APM_CS_16 + 8) --- linux/include/asm-i386/desc.h.orig Sun Aug 11 17:01:07 2002 +++ linux/include/asm-i386/desc.h Mon Aug 12 15:47:37 2002 @@ -2,50 +2,12 @@ #define __ARCH_DESC_H #include <asm/ldt.h> - -/* - * The layout of the per-CPU GDT under Linux: - * - * 0 - null - * 1 - Thread-Local Storage (TLS) segment - * 2 - kernel code segment - * 3 - kernel data segment - * 4 - user code segment <==== new cacheline - * 5 - user data segment - * 6 - TSS - * 7 - LDT - * 8 - APM BIOS support <==== new cacheline - * 9 - APM BIOS support - * 10 - APM BIOS support - * 11 - APM BIOS support - * 12 - PNPBIOS support <==== new cacheline - * 13 - PNPBIOS support - * 14 - PNPBIOS support - * 15 - PNPBIOS support - * 16 - PNPBIOS support <==== new cacheline - * 17 - not used - * 18 - not used - * 19 - not used - */ -#define TLS_ENTRY 1 -#define TSS_ENTRY 6 -#define LDT_ENTRY 7 -/* - * The interrupt descriptor table has room for 256 idt's, - * the global descriptor table is dependent on the number - * of tasks we can have.. - * - * We pad the GDT to cacheline boundary. - */ -#define IDT_ENTRIES 256 -#define GDT_ENTRIES 20 +#include <asm/segment.h> #ifndef __ASSEMBLY__ #include <asm/mmu.h> -#define GDT_SIZE (GDT_ENTRIES*sizeof(struct desc_struct)) - extern struct desc_struct cpu_gdt_table[NR_CPUS][GDT_ENTRIES]; struct Xgt_desc_struct { @@ -55,8 +17,8 @@ extern struct Xgt_desc_struct idt_descr, cpu_gdt_descr[NR_CPUS]; -#define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (TSS_ENTRY<<3)) -#define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (LDT_ENTRY<<3)) +#define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (GDT_ENTRY_TSS*8)) +#define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (GDT_ENTRY_LDT*8)) /* * This is the ldt that every process will get unless we need @@ -78,21 +40,42 @@ static inline void set_tss_desc(unsigned int cpu, void *addr) { - _set_tssldt_desc(&cpu_gdt_table[cpu][TSS_ENTRY], (int)addr, 235, 0x89); + _set_tssldt_desc(&cpu_gdt_table[cpu][GDT_ENTRY_TSS], (int)addr, 235, 0x89); } static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size) { - _set_tssldt_desc(&cpu_gdt_table[cpu][LDT_ENTRY], (int)addr, ((size << 3)-1), 0x82); + _set_tssldt_desc(&cpu_gdt_table[cpu][GDT_ENTRY_LDT], (int)addr, ((size << 3)-1), 0x82); } -#define TLS_FLAGS_MASK 0x00000001 +#define LDT_entry_a(info) \ + ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff)) -#define TLS_FLAG_WRITABLE 0x00000001 +#define LDT_entry_b(info) \ + (((info)->base_addr & 0xff000000) | \ + (((info)->base_addr & 0x00ff0000) >> 16) | \ + ((info)->limit & 0xf0000) | \ + (((info)->read_exec_only ^ 1) << 9) | \ + ((info)->contents << 10) | \ + (((info)->seg_not_present ^ 1) << 15) | \ + ((info)->seg_32bit << 22) | \ + ((info)->limit_in_pages << 23) | \ + ((info)->useable << 20) | \ + 0x7000) + +#define LDT_empty(info) (\ + (info)->base_addr == 0 && \ + (info)->limit == 0 && \ + (info)->contents == 0 && \ + (info)->read_exec_only == 1 && \ + (info)->seg_32bit == 0 && \ + (info)->limit_in_pages == 0 && \ + (info)->seg_not_present == 1 && \ + (info)->useable == 0 ) -static inline void load_TLS_desc(struct thread_struct *t, unsigned int cpu) +static inline void load_TLS(struct thread_struct *prev, struct thread_struct *next, unsigned int cpu) { - cpu_gdt_table[cpu][TLS_ENTRY] = t->tls_desc; + memcpy(cpu_gdt_table[cpu], next->tls_array, TLS_SIZE); } static inline void clear_LDT(void) --- linux/include/asm-i386/processor.h.orig Sun Aug 11 17:01:07 2002 +++ linux/include/asm-i386/processor.h Mon Aug 12 15:47:37 2002 @@ -22,6 +22,11 @@ unsigned long a,b; }; +#define desc_empty(desc) \ + (!((desc)->a + (desc)->b)) + +#define desc_equal(desc1, desc2) \ + (((desc1)->a == (desc2)->a) && ((desc1)->b == (desc2)->b)) /* * Default implementation of macro that returns current * instruction pointer ("program counter"). @@ -376,8 +381,16 @@ unsigned long v86flags, v86mask, v86mode, saved_esp0; /* IO permissions */ unsigned long *ts_io_bitmap; -/* TLS cached descriptor */ - struct desc_struct tls_desc; + + /* + * cached TLS descriptors. + * + * The offset calculation is needed to not copy the whole TLS + * into the local GDT all the time. + * We count offsets in bytes to reduce context-switch overhead. + */ + int private_tls; + struct desc_struct tls_array[GDT_ENTRY_TLS_MAX + 1]; }; #define INIT_THREAD { \ @@ -401,7 +414,7 @@ 0,0,0,0, /* esp,ebp,esi,edi */ \ 0,0,0,0,0,0, /* es,cs,ss */ \ 0,0,0,0,0,0, /* ds,fs,gs */ \ - LDT_ENTRY,0, /* ldt */ \ + GDT_ENTRY_LDT,0, /* ldt */ \ 0, INVALID_IO_BITMAP_OFFSET, /* tace, bitmap */ \ {~0, } /* ioperm */ \ } --- linux/include/asm-i386/segment.h.orig Sun Jun 9 07:28:19 2002 +++ linux/include/asm-i386/segment.h Mon Aug 12 15:47:37 2002 @@ -1,10 +1,79 @@ #ifndef _ASM_SEGMENT_H #define _ASM_SEGMENT_H -#define __KERNEL_CS 0x10 -#define __KERNEL_DS 0x18 +/* + * The layout of the per-CPU GDT under Linux: + * + * 0 - null + * + * ------- start of TLS (Thread-Local Storage) segments: + * + * 1 - TLS segment #1 [ default user CS ] + * 2 - TLS segment #2 [ default user DS ] + * 3 - TLS segment #3 [ glibc's TLS segment ] + * 4 - TLS segment #4 [ Wine's %fs Win32 segment ] + * 5 - TLS segment #5 + * 6 - TLS segment #6 + * 7 - TLS segment #7 + * + * ------- start of kernel segments: + * + * 8 - APM BIOS support [ segment 0x40 ] + * 9 - APM BIOS support + * 10 - APM BIOS support + * 11 - APM BIOS support + * 12 - kernel code segment <==== new cacheline + * 13 - kernel data segment + * 14 - TSS + * 15 - LDT + * 16 - PNPBIOS support (16->32 gate) + * 17 - PNPBIOS support + * 18 - PNPBIOS support + * 19 - PNPBIOS support + * 20 - PNPBIOS support + * 21 - reserved + * 22 - reserved + * 23 - reserved + */ +#define GDT_ENTRY_TLS_ENTRIES 7 +#define GDT_ENTRY_TLS_MIN 1 +#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1) -#define __USER_CS 0x23 -#define __USER_DS 0x2B +#define TLS_SIZE (GDT_ENTRY_TLS_MAX * 8) + +#define GDT_ENTRY_DEFAULT_USER_CS (GDT_ENTRY_TLS_MIN + 0) +#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS * 8 + 3) + +#define GDT_ENTRY_DEFAULT_USER_DS (GDT_ENTRY_TLS_MIN + 1) +#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS * 8 + 3) + + +#define GDT_ENTRY_KERNEL_BASE 8 + +#define GDT_ENTRY_APMBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 0) +#define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE + 4) +#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8) + +#define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE + 5) +#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8) + +#define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE + 6) +#define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE + 7) + +#define GDT_ENTRY_PNPBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 8) + +/* + * The GDT has 21 entries but we pad it to cacheline boundary: + */ +#define GDT_ENTRIES 24 + +#define GDT_SIZE (GDT_ENTRIES * 8) + +/* + * The interrupt descriptor table has room for 256 idt's, + * the global descriptor table is dependent on the number + * of tasks we can have.. + */ +#define IDT_ENTRIES 256 #endif --- linux/include/asm-i386/unistd.h.orig Sun Aug 11 17:01:07 2002 +++ linux/include/asm-i386/unistd.h Mon Aug 12 15:47:37 2002 @@ -248,6 +248,7 @@ #define __NR_sched_setaffinity 241 #define __NR_sched_getaffinity 242 #define __NR_set_thread_area 243 +#define __NR_get_thread_area 244 /* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */ ^ permalink raw reply [flat|nested] 47+ messages in thread
* [patch] tls-2.5.31-D4 2002-08-12 15:53 ` [patch] tls-2.5.31-D3 Ingo Molnar @ 2002-08-12 16:13 ` Ingo Molnar 2002-08-12 14:32 ` Luca Barbieri 2002-08-12 17:06 ` [patch] tls-2.5.31-D5 Ingo Molnar 0 siblings, 2 replies; 47+ messages in thread From: Ingo Molnar @ 2002-08-12 16:13 UTC (permalink / raw) To: Linus Torvalds; +Cc: linux-kernel, Alexandre Julliard, Luca Barbieri the attached patch (against 2.5.31-vanilla) further reduces the number of TLS entries and optimizes the load_TLS() code, which is now down to 11 cycles. There are 3 more entries left around for cacheline alignment reasons, so we can use them just in case more TLSs are needed. this is in essence the '2 free TLS entries' code, with the difference of more flexibility and the ability to change the default CS and DS segments as well. Ingo --- linux/drivers/pnp/pnpbios_core.c.orig Sun Aug 11 17:01:17 2002 +++ linux/drivers/pnp/pnpbios_core.c Mon Aug 12 16:12:38 2002 @@ -90,7 +90,8 @@ static union pnp_bios_expansion_header * pnp_bios_hdr = NULL; /* The PnP BIOS entries in the GDT */ -#define PNP_GDT (0x0060) +#define PNP_GDT (GDT_ENTRY_PNPBIOS_BASE * 8) + #define PNP_CS32 (PNP_GDT+0x00) /* segment for calling fn */ #define PNP_CS16 (PNP_GDT+0x08) /* code segment for BIOS */ #define PNP_DS (PNP_GDT+0x10) /* data segment for BIOS */ --- linux/arch/i386/kernel/cpu/common.c.orig Sun Aug 11 17:01:06 2002 +++ linux/arch/i386/kernel/cpu/common.c Mon Aug 12 16:12:38 2002 @@ -423,6 +423,7 @@ { int cpu = smp_processor_id(); struct tss_struct * t = init_tss + cpu; + struct thread_struct *thread = ¤t->thread; if (test_and_set_bit(cpu, &cpu_initialized)) { printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); @@ -447,9 +448,13 @@ */ if (cpu) { memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE); - cpu_gdt_descr[cpu].size = GDT_SIZE; + cpu_gdt_descr[cpu].size = GDT_SIZE - 1; cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu]; } + /* + * Set up the per-thread TLS descriptor cache: + */ + memcpy(thread->tls_array, cpu_gdt_table[cpu], GDT_ENTRY_TLS_MAX * 8); __asm__ __volatile__("lgdt %0": "=m" (cpu_gdt_descr[cpu])); __asm__ __volatile__("lidt %0": "=m" (idt_descr)); @@ -468,9 +473,9 @@ BUG(); enter_lazy_tlb(&init_mm, current, cpu); - t->esp0 = current->thread.esp0; + t->esp0 = thread->esp0; set_tss_desc(cpu,t); - cpu_gdt_table[cpu][TSS_ENTRY].b &= 0xfffffdff; + cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff; load_TR_desc(); load_LDT(&init_mm.context); --- linux/arch/i386/kernel/entry.S.orig Sun Aug 11 17:01:07 2002 +++ linux/arch/i386/kernel/entry.S Mon Aug 12 16:12:38 2002 @@ -753,6 +753,7 @@ .long sys_sched_setaffinity .long sys_sched_getaffinity .long sys_set_thread_area + .long sys_get_thread_area .rept NR_syscalls-(.-sys_call_table)/4 .long sys_ni_syscall --- linux/arch/i386/kernel/head.S.orig Sun Aug 11 17:01:06 2002 +++ linux/arch/i386/kernel/head.S Mon Aug 12 16:12:38 2002 @@ -239,12 +239,7 @@ movl %eax,%es movl %eax,%fs movl %eax,%gs -#ifdef CONFIG_SMP - movl $(__KERNEL_DS), %eax - movl %eax,%ss # Reload the stack pointer (segment only) -#else - lss stack_start,%esp # Load processor stack -#endif + movl %eax,%ss xorl %eax,%eax lldt %ax cld # gcc2 wants the direction flag cleared at all times @@ -412,17 +407,17 @@ ALIGN /* - * The Global Descriptor Table contains 20 quadwords, per-CPU. + * The Global Descriptor Table contains 28 quadwords, per-CPU. */ ENTRY(cpu_gdt_table) .quad 0x0000000000000000 /* NULL descriptor */ - .quad 0x0000000000000000 /* TLS descriptor */ - .quad 0x00cf9a000000ffff /* 0x10 kernel 4GB code at 0x00000000 */ - .quad 0x00cf92000000ffff /* 0x18 kernel 4GB data at 0x00000000 */ - .quad 0x00cffa000000ffff /* 0x23 user 4GB code at 0x00000000 */ - .quad 0x00cff2000000ffff /* 0x2b user 4GB data at 0x00000000 */ - .quad 0x0000000000000000 /* TSS descriptor */ - .quad 0x0000000000000000 /* LDT descriptor */ + .quad 0x00cffa000000ffff /* 0x0b user 4GB code at 0x00000000 */ + .quad 0x00cff2000000ffff /* 0x13 user 4GB data at 0x00000000 */ + .quad 0x0000000000000000 /* 0x1b TLS entry 3 */ + .quad 0x0000000000000000 /* 0x23 TLS entry 4 */ + .quad 0x0000000000000000 /* 0x2b reserved */ + .quad 0x0000000000000000 /* 0x33 reserved */ + .quad 0x0000000000000000 /* 0x3b reserved */ /* * The APM segments have byte granularity and their bases * and limits are set at run time. @@ -431,15 +426,21 @@ .quad 0x00409a0000000000 /* 0x48 APM CS code */ .quad 0x00009a0000000000 /* 0x50 APM CS 16 code (16 bit) */ .quad 0x0040920000000000 /* 0x58 APM DS data */ + + .quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */ + .quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */ + .quad 0x0000000000000000 /* 0x70 TSS descriptor */ + .quad 0x0000000000000000 /* 0x78 LDT descriptor */ + /* Segments used for calling PnP BIOS */ - .quad 0x00c09a0000000000 /* 0x60 32-bit code */ - .quad 0x00809a0000000000 /* 0x68 16-bit code */ - .quad 0x0080920000000000 /* 0x70 16-bit data */ - .quad 0x0080920000000000 /* 0x78 16-bit data */ - .quad 0x0080920000000000 /* 0x80 16-bit data */ - .quad 0x0000000000000000 /* 0x88 not used */ - .quad 0x0000000000000000 /* 0x90 not used */ - .quad 0x0000000000000000 /* 0x98 not used */ + .quad 0x00c09a0000000000 /* 0x80 32-bit code */ + .quad 0x00809a0000000000 /* 0x88 16-bit code */ + .quad 0x0080920000000000 /* 0x90 16-bit data */ + .quad 0x0080920000000000 /* 0x98 16-bit data */ + .quad 0x0080920000000000 /* 0xa0 16-bit data */ + .quad 0x0000000000000000 /* 0xa8 not used */ + .quad 0x0000000000000000 /* 0xb0 not used */ + .quad 0x0000000000000000 /* 0xb8 not used */ #if CONFIG_SMP .fill (NR_CPUS-1)*GDT_ENTRIES,8,0 /* other CPU's GDT */ --- linux/arch/i386/kernel/process.c.orig Sun Aug 11 17:01:08 2002 +++ linux/arch/i386/kernel/process.c Mon Aug 12 16:12:38 2002 @@ -681,11 +681,9 @@ /* * Load the per-thread Thread-Local Storage descriptor. - * - * NOTE: it's faster to do the two stores unconditionally - * than to branch away. */ - load_TLS_desc(next, cpu); + if (prev->private_tls || next->private_tls) + load_TLS(next, cpu); /* * Save away %fs and %gs. No need to save %es and %ds, as @@ -834,35 +832,142 @@ #undef first_sched /* - * Set the Thread-Local Storage area: + * sys_alloc_thread_area: get a yet unused TLS descriptor index. */ -asmlinkage int sys_set_thread_area(unsigned long base, unsigned long flags) +static int get_free_idx(void) { struct thread_struct *t = ¤t->thread; - int writable = 0; - int cpu; + int idx; - /* do not allow unused flags */ - if (flags & ~TLS_FLAGS_MASK) + for (idx = GDT_ENTRY_TLS_MIN; idx <= GDT_ENTRY_TLS_MAX; idx++) + if (desc_empty(t->tls_array + idx)) + return idx; + return -ESRCH; +} + +static inline int private_tls(struct desc_struct *array) +{ + struct desc_struct *default_array = init_task.thread.tls_array; + + if (!memcmp(array, default_array, TLS_SIZE)) + return 0; + return 1; +} + +static inline int last_tls(struct desc_struct *array) +{ + struct desc_struct *default_array = init_task.thread.tls_array; + int idx; + + for (idx = GDT_ENTRY_TLS_MAX; idx >= GDT_ENTRY_TLS_MIN; idx--) + if (!desc_equal(array + idx, default_array + idx)) + return idx; + + return 0; +} + +#define CHECK_TLS_IDX(idx) \ +do { \ + if ((idx) < GDT_ENTRY_TLS_MIN || (idx) > GDT_ENTRY_TLS_MAX) \ + BUG(); \ +} while (0) + +/* + * Set a given TLS descriptor: + */ +asmlinkage int sys_set_thread_area(struct modify_ldt_ldt_s *u_info) +{ + struct thread_struct *t = ¤t->thread; + struct modify_ldt_ldt_s info; + struct desc_struct *desc; + int cpu, idx; + + if (copy_from_user(&info, u_info, sizeof(info))) + return -EFAULT; + idx = info.entry_number; + + /* + * index -1 means the kernel should try to find and + * allocate an empty descriptor: + */ + if (idx == -1) { + idx = get_free_idx(); + if (idx < 0) + return idx; + if (put_user(idx, &u_info->entry_number)) + return -EFAULT; + } + + if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) return -EINVAL; - if (flags & TLS_FLAG_WRITABLE) - writable = 1; + desc = t->tls_array + idx; /* * We must not get preempted while modifying the TLS. */ cpu = get_cpu(); - t->tls_desc.a = ((base & 0x0000ffff) << 16) | 0xffff; - - t->tls_desc.b = (base & 0xff000000) | ((base & 0x00ff0000) >> 16) | - 0xf0000 | (writable << 9) | (1 << 15) | - (1 << 22) | (1 << 23) | 0x7000; + if (LDT_empty(&info)) { + desc->a = 0; + desc->b = 0; + } else { + desc->a = LDT_entry_a(&info); + desc->b = LDT_entry_b(&info); + } + t->private_tls = private_tls(t->tls_array); + load_TLS(t, cpu); - load_TLS_desc(t, cpu); put_cpu(); - return TLS_ENTRY*8 + 3; + return 0; +} + +/* + * Get the current Thread-Local Storage area: + */ + +#define GET_BASE(desc) ( \ + (((desc)->a >> 16) & 0x0000ffff) | \ + (((desc)->b << 16) & 0x00ff0000) | \ + ( (desc)->b & 0xff000000) ) + +#define GET_LIMIT(desc) ( \ + ((desc)->a & 0x0ffff) | \ + ((desc)->b & 0xf0000) ) + +#define GET_32BIT(desc) (((desc)->b >> 23) & 1) +#define GET_CONTENTS(desc) (((desc)->b >> 10) & 3) +#define GET_WRITABLE(desc) (((desc)->b >> 9) & 1) +#define GET_LIMIT_PAGES(desc) (((desc)->b >> 23) & 1) +#define GET_PRESENT(desc) (((desc)->b >> 15) & 1) +#define GET_USEABLE(desc) (((desc)->b >> 20) & 1) + +asmlinkage int sys_get_thread_area(struct modify_ldt_ldt_s *u_info) +{ + struct modify_ldt_ldt_s info; + struct desc_struct *desc; + int idx; + + if (get_user(idx, &u_info->entry_number)) + return -EFAULT; + if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) + return -EINVAL; + + desc = current->thread.tls_array + idx; + + info.entry_number = idx; + info.base_addr = GET_BASE(desc); + info.limit = GET_LIMIT(desc); + info.seg_32bit = GET_32BIT(desc); + info.contents = GET_CONTENTS(desc); + info.read_exec_only = !GET_WRITABLE(desc); + info.limit_in_pages = GET_LIMIT_PAGES(desc); + info.seg_not_present = !GET_PRESENT(desc); + info.useable = GET_USEABLE(desc); + + if (copy_to_user(u_info, &info, sizeof(info))) + return -EFAULT; + return 0; } --- linux/arch/i386/kernel/suspend.c.orig Sun Aug 11 17:01:06 2002 +++ linux/arch/i386/kernel/suspend.c Mon Aug 12 16:12:38 2002 @@ -207,7 +207,7 @@ struct tss_struct * t = init_tss + cpu; set_tss_desc(cpu,t); /* This just modifies memory; should not be neccessary. But... This is neccessary, because 386 hardware has concept of busy tsc or some similar stupidity. */ - cpu_gdt_table[cpu][TSS_ENTRY].b &= 0xfffffdff; + cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff; load_TR_desc(); /* This does ltr */ load_LDT(¤t->mm->context); /* This does lldt */ --- linux/arch/i386/kernel/ldt.c.orig Sun Aug 11 17:01:04 2002 +++ linux/arch/i386/kernel/ldt.c Mon Aug 12 16:12:38 2002 @@ -200,32 +200,17 @@ /* Allow LDTs to be cleared by the user. */ if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { - if (oldmode || - (ldt_info.contents == 0 && - ldt_info.read_exec_only == 1 && - ldt_info.seg_32bit == 0 && - ldt_info.limit_in_pages == 0 && - ldt_info.seg_not_present == 1 && - ldt_info.useable == 0 )) { + if (oldmode || LDT_empty(&ldt_info)) { entry_1 = 0; entry_2 = 0; goto install; } } - entry_1 = ((ldt_info.base_addr & 0x0000ffff) << 16) | - (ldt_info.limit & 0x0ffff); - entry_2 = (ldt_info.base_addr & 0xff000000) | - ((ldt_info.base_addr & 0x00ff0000) >> 16) | - (ldt_info.limit & 0xf0000) | - ((ldt_info.read_exec_only ^ 1) << 9) | - (ldt_info.contents << 10) | - ((ldt_info.seg_not_present ^ 1) << 15) | - (ldt_info.seg_32bit << 22) | - (ldt_info.limit_in_pages << 23) | - 0x7000; - if (!oldmode) - entry_2 |= (ldt_info.useable << 20); + entry_1 = LDT_entry_a(&ldt_info); + entry_2 = LDT_entry_b(&ldt_info); + if (oldmode) + entry_2 &= ~(1 << 20); /* Install the new entry ... */ install: --- linux/arch/i386/boot/setup.S.orig Sun Jun 9 07:26:32 2002 +++ linux/arch/i386/boot/setup.S Mon Aug 12 16:12:38 2002 @@ -1005,9 +1005,14 @@ ret # Descriptor tables +# +# NOTE: if you think the GDT is large, you can make it smaller by just +# defining the KERNEL_CS and KERNEL_DS entries and shifting the gdt +# address down by GDT_ENTRY_KERNEL_CS*8. This puts bogus entries into +# the GDT, but those wont be used so it's not a problem. +# gdt: - .word 0, 0, 0, 0 # dummy - .word 0, 0, 0, 0 # unused + .fill GDT_ENTRY_KERNEL_CS,8,0 .word 0xFFFF # 4Gb - (0x100000*0x1000 = 4Gb) .word 0 # base address = 0 --- linux/include/linux/apm_bios.h.orig Sun Jun 9 07:30:24 2002 +++ linux/include/linux/apm_bios.h Mon Aug 12 16:12:38 2002 @@ -21,8 +21,8 @@ #ifdef __KERNEL__ -#define APM_40 0x40 -#define APM_CS (APM_40 + 8) +#define APM_40 (GDT_ENTRY_APMBIOS_BASE * 8) +#define APM_CS (APM_BASE + 8) #define APM_CS_16 (APM_CS + 8) #define APM_DS (APM_CS_16 + 8) --- linux/include/asm-i386/desc.h.orig Sun Aug 11 17:01:07 2002 +++ linux/include/asm-i386/desc.h Mon Aug 12 16:12:38 2002 @@ -2,50 +2,12 @@ #define __ARCH_DESC_H #include <asm/ldt.h> - -/* - * The layout of the per-CPU GDT under Linux: - * - * 0 - null - * 1 - Thread-Local Storage (TLS) segment - * 2 - kernel code segment - * 3 - kernel data segment - * 4 - user code segment <==== new cacheline - * 5 - user data segment - * 6 - TSS - * 7 - LDT - * 8 - APM BIOS support <==== new cacheline - * 9 - APM BIOS support - * 10 - APM BIOS support - * 11 - APM BIOS support - * 12 - PNPBIOS support <==== new cacheline - * 13 - PNPBIOS support - * 14 - PNPBIOS support - * 15 - PNPBIOS support - * 16 - PNPBIOS support <==== new cacheline - * 17 - not used - * 18 - not used - * 19 - not used - */ -#define TLS_ENTRY 1 -#define TSS_ENTRY 6 -#define LDT_ENTRY 7 -/* - * The interrupt descriptor table has room for 256 idt's, - * the global descriptor table is dependent on the number - * of tasks we can have.. - * - * We pad the GDT to cacheline boundary. - */ -#define IDT_ENTRIES 256 -#define GDT_ENTRIES 20 +#include <asm/segment.h> #ifndef __ASSEMBLY__ #include <asm/mmu.h> -#define GDT_SIZE (GDT_ENTRIES*sizeof(struct desc_struct)) - extern struct desc_struct cpu_gdt_table[NR_CPUS][GDT_ENTRIES]; struct Xgt_desc_struct { @@ -55,8 +17,8 @@ extern struct Xgt_desc_struct idt_descr, cpu_gdt_descr[NR_CPUS]; -#define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (TSS_ENTRY<<3)) -#define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (LDT_ENTRY<<3)) +#define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (GDT_ENTRY_TSS*8)) +#define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (GDT_ENTRY_LDT*8)) /* * This is the ldt that every process will get unless we need @@ -78,21 +40,48 @@ static inline void set_tss_desc(unsigned int cpu, void *addr) { - _set_tssldt_desc(&cpu_gdt_table[cpu][TSS_ENTRY], (int)addr, 235, 0x89); + _set_tssldt_desc(&cpu_gdt_table[cpu][GDT_ENTRY_TSS], (int)addr, 235, 0x89); } static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size) { - _set_tssldt_desc(&cpu_gdt_table[cpu][LDT_ENTRY], (int)addr, ((size << 3)-1), 0x82); + _set_tssldt_desc(&cpu_gdt_table[cpu][GDT_ENTRY_LDT], (int)addr, ((size << 3)-1), 0x82); } -#define TLS_FLAGS_MASK 0x00000001 +#define LDT_entry_a(info) \ + ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff)) -#define TLS_FLAG_WRITABLE 0x00000001 +#define LDT_entry_b(info) \ + (((info)->base_addr & 0xff000000) | \ + (((info)->base_addr & 0x00ff0000) >> 16) | \ + ((info)->limit & 0xf0000) | \ + (((info)->read_exec_only ^ 1) << 9) | \ + ((info)->contents << 10) | \ + (((info)->seg_not_present ^ 1) << 15) | \ + ((info)->seg_32bit << 22) | \ + ((info)->limit_in_pages << 23) | \ + ((info)->useable << 20) | \ + 0x7000) + +#define LDT_empty(info) (\ + (info)->base_addr == 0 && \ + (info)->limit == 0 && \ + (info)->contents == 0 && \ + (info)->read_exec_only == 1 && \ + (info)->seg_32bit == 0 && \ + (info)->limit_in_pages == 0 && \ + (info)->seg_not_present == 1 && \ + (info)->useable == 0 ) + +#if TLS_SIZE != 32 +# error update this code. +#endif -static inline void load_TLS_desc(struct thread_struct *t, unsigned int cpu) +static inline void load_TLS(struct thread_struct *t, unsigned int cpu) { - cpu_gdt_table[cpu][TLS_ENTRY] = t->tls_desc; +#define C(i) cpu_gdt_table[cpu][i] = t->tls_array[i] + C(1); C(2); C(3); C(4); C(5); C(6); C(7); C(8); +#undef C } static inline void clear_LDT(void) --- linux/include/asm-i386/processor.h.orig Sun Aug 11 17:01:07 2002 +++ linux/include/asm-i386/processor.h Mon Aug 12 16:12:38 2002 @@ -22,6 +22,11 @@ unsigned long a,b; }; +#define desc_empty(desc) \ + (!((desc)->a + (desc)->b)) + +#define desc_equal(desc1, desc2) \ + (((desc1)->a == (desc2)->a) && ((desc1)->b == (desc2)->b)) /* * Default implementation of macro that returns current * instruction pointer ("program counter"). @@ -376,8 +381,16 @@ unsigned long v86flags, v86mask, v86mode, saved_esp0; /* IO permissions */ unsigned long *ts_io_bitmap; -/* TLS cached descriptor */ - struct desc_struct tls_desc; + + /* + * cached TLS descriptors. + * + * The offset calculation is needed to not copy the whole TLS + * into the local GDT all the time. + * We count offsets in bytes to reduce context-switch overhead. + */ + int private_tls; + struct desc_struct tls_array[GDT_ENTRY_TLS_MAX + 1]; }; #define INIT_THREAD { \ @@ -401,7 +414,7 @@ 0,0,0,0, /* esp,ebp,esi,edi */ \ 0,0,0,0,0,0, /* es,cs,ss */ \ 0,0,0,0,0,0, /* ds,fs,gs */ \ - LDT_ENTRY,0, /* ldt */ \ + GDT_ENTRY_LDT,0, /* ldt */ \ 0, INVALID_IO_BITMAP_OFFSET, /* tace, bitmap */ \ {~0, } /* ioperm */ \ } --- linux/include/asm-i386/segment.h.orig Sun Jun 9 07:28:19 2002 +++ linux/include/asm-i386/segment.h Mon Aug 12 16:12:38 2002 @@ -1,10 +1,79 @@ #ifndef _ASM_SEGMENT_H #define _ASM_SEGMENT_H -#define __KERNEL_CS 0x10 -#define __KERNEL_DS 0x18 +/* + * The layout of the per-CPU GDT under Linux: + * + * 0 - null + * + * ------- start of TLS (Thread-Local Storage) segments: + * + * 1 - TLS segment #1 [ default user CS ] + * 2 - TLS segment #2 [ default user DS ] + * 3 - TLS segment #3 [ glibc's TLS segment ] + * 4 - TLS segment #4 [ Wine's %fs Win32 segment ] + * + * ------- start of kernel segments: + * + * 5 - reserved + * 6 - reserved + * 7 - reserved + * 8 - APM BIOS support [ segment 0x40 ] + * 9 - APM BIOS support + * 10 - APM BIOS support + * 11 - APM BIOS support + * 12 - kernel code segment <==== new cacheline + * 13 - kernel data segment + * 14 - TSS + * 15 - LDT + * 16 - PNPBIOS support (16->32 gate) + * 17 - PNPBIOS support + * 18 - PNPBIOS support + * 19 - PNPBIOS support + * 20 - PNPBIOS support + * 21 - reserved + * 22 - reserved + * 23 - reserved + */ +#define GDT_ENTRY_TLS_ENTRIES 4 +#define GDT_ENTRY_TLS_MIN 1 +#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1) -#define __USER_CS 0x23 -#define __USER_DS 0x2B +#define TLS_SIZE (GDT_ENTRY_TLS_MAX * 8) + +#define GDT_ENTRY_DEFAULT_USER_CS (GDT_ENTRY_TLS_MIN + 0) +#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS * 8 + 3) + +#define GDT_ENTRY_DEFAULT_USER_DS (GDT_ENTRY_TLS_MIN + 1) +#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS * 8 + 3) + + +#define GDT_ENTRY_KERNEL_BASE 8 + +#define GDT_ENTRY_APMBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 0) +#define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE + 4) +#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8) + +#define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE + 5) +#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8) + +#define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE + 6) +#define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE + 7) + +#define GDT_ENTRY_PNPBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 8) + +/* + * The GDT has 21 entries but we pad it to cacheline boundary: + */ +#define GDT_ENTRIES 24 + +#define GDT_SIZE (GDT_ENTRIES * 8) + +/* + * The interrupt descriptor table has room for 256 idt's, + * the global descriptor table is dependent on the number + * of tasks we can have.. + */ +#define IDT_ENTRIES 256 #endif --- linux/include/asm-i386/unistd.h.orig Sun Aug 11 17:01:07 2002 +++ linux/include/asm-i386/unistd.h Mon Aug 12 16:12:38 2002 @@ -248,6 +248,7 @@ #define __NR_sched_setaffinity 241 #define __NR_sched_getaffinity 242 #define __NR_set_thread_area 243 +#define __NR_get_thread_area 244 /* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */ ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [patch] tls-2.5.31-D4 2002-08-12 16:13 ` [patch] tls-2.5.31-D4 Ingo Molnar @ 2002-08-12 14:32 ` Luca Barbieri 2002-08-12 17:06 ` [patch] tls-2.5.31-D5 Ingo Molnar 1 sibling, 0 replies; 47+ messages in thread From: Luca Barbieri @ 2002-08-12 14:32 UTC (permalink / raw) To: Ingo Molnar; +Cc: Linus Torvalds, Linux-Kernel ML, Alexandre Julliard [-- Attachment #1: Type: text/plain, Size: 738 bytes --] > the ability to change the default CS and DS segments > as well. This does not make any sense. The user is free to load any selector in %cs/%ds/%es/%ss so the default flat segments should be left alone so that a process can have the flat segments _plus_ all the tls entries. > although i suspect Wine needs a 16-bit entry, while > the APM one is a 32-bit entry ... AFAIK this only matters for code and stack segments and anyway the APM one should be a 16-bit entry since it exists because the BIOS wrongly assumes that it is a real-mode segment. Anyway, isn't it better to put the user segments in a cacheline that doesn't already lose one entry to the null selector? (and leave the first one either empty or for BIOS/boot selectors) [-- Attachment #2: This is a digitally signed message part --] [-- Type: application/pgp-signature, Size: 189 bytes --] ^ permalink raw reply [flat|nested] 47+ messages in thread
* [patch] tls-2.5.31-D5 2002-08-12 16:13 ` [patch] tls-2.5.31-D4 Ingo Molnar 2002-08-12 14:32 ` Luca Barbieri @ 2002-08-12 17:06 ` Ingo Molnar 2002-08-12 15:21 ` Jakub Jelinek 2002-08-12 17:24 ` [patch] tls-2.5.31-D7 Ingo Molnar 1 sibling, 2 replies; 47+ messages in thread From: Ingo Molnar @ 2002-08-12 17:06 UTC (permalink / raw) To: Linus Torvalds; +Cc: linux-kernel, Alexandre Julliard, Luca Barbieri okay, the attached patch does some more things: - moves the first two TLS entries and the user CS/DS entries on the same cacheline. - excludes CS/DS from the TLS space - Luca is right in that it only slows things down unnecesserily, and there is nothing that cannot be done by changing the %ds %cs selectors - and every cycle counts in the context-switch path. the only open issues are the number of TLSs supported. I'd vote for making them 4 and then we can inline the copy and make it unconditional, it will be 12 cycles to copy them all which alone is better than a branch miss. In this patch it's 2, thus the copying cost is 6 cycles. with 4 entries the 0x40 entry would be taken and APM has to move further up, and has to save/restore the 0x40 entry across BIOS calls. Ingo --- linux/drivers/pnp/pnpbios_core.c.orig Sun Aug 11 17:01:17 2002 +++ linux/drivers/pnp/pnpbios_core.c Mon Aug 12 17:01:11 2002 @@ -90,7 +90,8 @@ static union pnp_bios_expansion_header * pnp_bios_hdr = NULL; /* The PnP BIOS entries in the GDT */ -#define PNP_GDT (0x0060) +#define PNP_GDT (GDT_ENTRY_PNPBIOS_BASE * 8) + #define PNP_CS32 (PNP_GDT+0x00) /* segment for calling fn */ #define PNP_CS16 (PNP_GDT+0x08) /* code segment for BIOS */ #define PNP_DS (PNP_GDT+0x10) /* data segment for BIOS */ --- linux/arch/i386/kernel/cpu/common.c.orig Sun Aug 11 17:01:06 2002 +++ linux/arch/i386/kernel/cpu/common.c Mon Aug 12 17:01:11 2002 @@ -423,6 +423,7 @@ { int cpu = smp_processor_id(); struct tss_struct * t = init_tss + cpu; + struct thread_struct *thread = ¤t->thread; if (test_and_set_bit(cpu, &cpu_initialized)) { printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); @@ -447,9 +448,13 @@ */ if (cpu) { memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE); - cpu_gdt_descr[cpu].size = GDT_SIZE; + cpu_gdt_descr[cpu].size = GDT_SIZE - 1; cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu]; } + /* + * Set up the per-thread TLS descriptor cache: + */ + memcpy(thread->tls_array, cpu_gdt_table[cpu], GDT_ENTRY_TLS_MAX * 8); __asm__ __volatile__("lgdt %0": "=m" (cpu_gdt_descr[cpu])); __asm__ __volatile__("lidt %0": "=m" (idt_descr)); @@ -468,9 +473,9 @@ BUG(); enter_lazy_tlb(&init_mm, current, cpu); - t->esp0 = current->thread.esp0; + t->esp0 = thread->esp0; set_tss_desc(cpu,t); - cpu_gdt_table[cpu][TSS_ENTRY].b &= 0xfffffdff; + cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff; load_TR_desc(); load_LDT(&init_mm.context); --- linux/arch/i386/kernel/entry.S.orig Sun Aug 11 17:01:07 2002 +++ linux/arch/i386/kernel/entry.S Mon Aug 12 17:01:11 2002 @@ -753,6 +753,7 @@ .long sys_sched_setaffinity .long sys_sched_getaffinity .long sys_set_thread_area + .long sys_get_thread_area .rept NR_syscalls-(.-sys_call_table)/4 .long sys_ni_syscall --- linux/arch/i386/kernel/head.S.orig Sun Aug 11 17:01:06 2002 +++ linux/arch/i386/kernel/head.S Mon Aug 12 17:01:11 2002 @@ -239,12 +239,7 @@ movl %eax,%es movl %eax,%fs movl %eax,%gs -#ifdef CONFIG_SMP - movl $(__KERNEL_DS), %eax - movl %eax,%ss # Reload the stack pointer (segment only) -#else - lss stack_start,%esp # Load processor stack -#endif + movl %eax,%ss xorl %eax,%eax lldt %ax cld # gcc2 wants the direction flag cleared at all times @@ -412,17 +407,17 @@ ALIGN /* - * The Global Descriptor Table contains 20 quadwords, per-CPU. + * The Global Descriptor Table contains 28 quadwords, per-CPU. */ ENTRY(cpu_gdt_table) .quad 0x0000000000000000 /* NULL descriptor */ - .quad 0x0000000000000000 /* TLS descriptor */ - .quad 0x00cf9a000000ffff /* 0x10 kernel 4GB code at 0x00000000 */ - .quad 0x00cf92000000ffff /* 0x18 kernel 4GB data at 0x00000000 */ - .quad 0x00cffa000000ffff /* 0x23 user 4GB code at 0x00000000 */ - .quad 0x00cff2000000ffff /* 0x2b user 4GB data at 0x00000000 */ - .quad 0x0000000000000000 /* TSS descriptor */ - .quad 0x0000000000000000 /* LDT descriptor */ + .quad 0x0000000000000000 /* 0x0b reserved */ + .quad 0x0000000000000000 /* 0x13 reserved */ + .quad 0x0000000000000000 /* 0x1b reserved */ + .quad 0x00cffa000000ffff /* 0x23 user 4GB code at 0x00000000 */ + .quad 0x00cff2000000ffff /* 0x2b user 4GB data at 0x00000000 */ + .quad 0x0000000000000000 /* 0x33 TLS entry 1 */ + .quad 0x0000000000000000 /* 0x3b TLS entry 2 */ /* * The APM segments have byte granularity and their bases * and limits are set at run time. @@ -431,15 +426,21 @@ .quad 0x00409a0000000000 /* 0x48 APM CS code */ .quad 0x00009a0000000000 /* 0x50 APM CS 16 code (16 bit) */ .quad 0x0040920000000000 /* 0x58 APM DS data */ + + .quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */ + .quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */ + .quad 0x0000000000000000 /* 0x70 TSS descriptor */ + .quad 0x0000000000000000 /* 0x78 LDT descriptor */ + /* Segments used for calling PnP BIOS */ - .quad 0x00c09a0000000000 /* 0x60 32-bit code */ - .quad 0x00809a0000000000 /* 0x68 16-bit code */ - .quad 0x0080920000000000 /* 0x70 16-bit data */ - .quad 0x0080920000000000 /* 0x78 16-bit data */ - .quad 0x0080920000000000 /* 0x80 16-bit data */ - .quad 0x0000000000000000 /* 0x88 not used */ - .quad 0x0000000000000000 /* 0x90 not used */ - .quad 0x0000000000000000 /* 0x98 not used */ + .quad 0x00c09a0000000000 /* 0x80 32-bit code */ + .quad 0x00809a0000000000 /* 0x88 16-bit code */ + .quad 0x0080920000000000 /* 0x90 16-bit data */ + .quad 0x0080920000000000 /* 0x98 16-bit data */ + .quad 0x0080920000000000 /* 0xa0 16-bit data */ + .quad 0x0000000000000000 /* 0xa8 not used */ + .quad 0x0000000000000000 /* 0xb0 not used */ + .quad 0x0000000000000000 /* 0xb8 not used */ #if CONFIG_SMP .fill (NR_CPUS-1)*GDT_ENTRIES,8,0 /* other CPU's GDT */ --- linux/arch/i386/kernel/process.c.orig Sun Aug 11 17:01:08 2002 +++ linux/arch/i386/kernel/process.c Mon Aug 12 17:01:11 2002 @@ -681,11 +681,9 @@ /* * Load the per-thread Thread-Local Storage descriptor. - * - * NOTE: it's faster to do the two stores unconditionally - * than to branch away. */ - load_TLS_desc(next, cpu); + if (prev->private_tls || next->private_tls) + load_TLS(next, cpu); /* * Save away %fs and %gs. No need to save %es and %ds, as @@ -834,35 +832,125 @@ #undef first_sched /* - * Set the Thread-Local Storage area: + * sys_alloc_thread_area: get a yet unused TLS descriptor index. */ -asmlinkage int sys_set_thread_area(unsigned long base, unsigned long flags) +static int get_free_idx(void) { struct thread_struct *t = ¤t->thread; - int writable = 0; - int cpu; + int idx; - /* do not allow unused flags */ - if (flags & ~TLS_FLAGS_MASK) + for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++) + if (desc_empty(t->tls_array + idx)) + return idx + GDT_ENTRY_TLS_MIN; + return -ESRCH; +} + +static inline int private_tls(struct desc_struct *array) +{ + int idx; + + for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++) + if (!desc_empty(array + idx)) + return 0; + return 1; +} + +/* + * Set a given TLS descriptor: + */ +asmlinkage int sys_set_thread_area(struct modify_ldt_ldt_s *u_info) +{ + struct thread_struct *t = ¤t->thread; + struct modify_ldt_ldt_s info; + struct desc_struct *desc; + int cpu, idx; + + if (copy_from_user(&info, u_info, sizeof(info))) + return -EFAULT; + idx = info.entry_number; + + /* + * index -1 means the kernel should try to find and + * allocate an empty descriptor: + */ + if (idx == -1) { + idx = get_free_idx(); + if (idx < 0) + return idx; + if (put_user(idx, &u_info->entry_number)) + return -EFAULT; + } + + if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) return -EINVAL; - if (flags & TLS_FLAG_WRITABLE) - writable = 1; + desc = t->tls_array + idx - GDT_ENTRY_TLS_MIN; /* * We must not get preempted while modifying the TLS. */ cpu = get_cpu(); - t->tls_desc.a = ((base & 0x0000ffff) << 16) | 0xffff; - - t->tls_desc.b = (base & 0xff000000) | ((base & 0x00ff0000) >> 16) | - 0xf0000 | (writable << 9) | (1 << 15) | - (1 << 22) | (1 << 23) | 0x7000; + if (LDT_empty(&info)) { + desc->a = 0; + desc->b = 0; + } else { + desc->a = LDT_entry_a(&info); + desc->b = LDT_entry_b(&info); + } + t->private_tls = private_tls(t->tls_array); + load_TLS(t, cpu); - load_TLS_desc(t, cpu); put_cpu(); - return TLS_ENTRY*8 + 3; + return 0; +} + +/* + * Get the current Thread-Local Storage area: + */ + +#define GET_BASE(desc) ( \ + (((desc)->a >> 16) & 0x0000ffff) | \ + (((desc)->b << 16) & 0x00ff0000) | \ + ( (desc)->b & 0xff000000) ) + +#define GET_LIMIT(desc) ( \ + ((desc)->a & 0x0ffff) | \ + ((desc)->b & 0xf0000) ) + +#define GET_32BIT(desc) (((desc)->b >> 23) & 1) +#define GET_CONTENTS(desc) (((desc)->b >> 10) & 3) +#define GET_WRITABLE(desc) (((desc)->b >> 9) & 1) +#define GET_LIMIT_PAGES(desc) (((desc)->b >> 23) & 1) +#define GET_PRESENT(desc) (((desc)->b >> 15) & 1) +#define GET_USEABLE(desc) (((desc)->b >> 20) & 1) + +asmlinkage int sys_get_thread_area(struct modify_ldt_ldt_s *u_info) +{ + struct modify_ldt_ldt_s info; + struct desc_struct *desc; + int idx; + + if (get_user(idx, &u_info->entry_number)) + return -EFAULT; + if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) + return -EINVAL; + + desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN; + + info.entry_number = idx; + info.base_addr = GET_BASE(desc); + info.limit = GET_LIMIT(desc); + info.seg_32bit = GET_32BIT(desc); + info.contents = GET_CONTENTS(desc); + info.read_exec_only = !GET_WRITABLE(desc); + info.limit_in_pages = GET_LIMIT_PAGES(desc); + info.seg_not_present = !GET_PRESENT(desc); + info.useable = GET_USEABLE(desc); + + if (copy_to_user(u_info, &info, sizeof(info))) + return -EFAULT; + return 0; } --- linux/arch/i386/kernel/suspend.c.orig Sun Aug 11 17:01:06 2002 +++ linux/arch/i386/kernel/suspend.c Mon Aug 12 17:01:11 2002 @@ -207,7 +207,7 @@ struct tss_struct * t = init_tss + cpu; set_tss_desc(cpu,t); /* This just modifies memory; should not be neccessary. But... This is neccessary, because 386 hardware has concept of busy tsc or some similar stupidity. */ - cpu_gdt_table[cpu][TSS_ENTRY].b &= 0xfffffdff; + cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff; load_TR_desc(); /* This does ltr */ load_LDT(¤t->mm->context); /* This does lldt */ --- linux/arch/i386/kernel/ldt.c.orig Sun Aug 11 17:01:04 2002 +++ linux/arch/i386/kernel/ldt.c Mon Aug 12 17:01:11 2002 @@ -200,32 +200,17 @@ /* Allow LDTs to be cleared by the user. */ if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { - if (oldmode || - (ldt_info.contents == 0 && - ldt_info.read_exec_only == 1 && - ldt_info.seg_32bit == 0 && - ldt_info.limit_in_pages == 0 && - ldt_info.seg_not_present == 1 && - ldt_info.useable == 0 )) { + if (oldmode || LDT_empty(&ldt_info)) { entry_1 = 0; entry_2 = 0; goto install; } } - entry_1 = ((ldt_info.base_addr & 0x0000ffff) << 16) | - (ldt_info.limit & 0x0ffff); - entry_2 = (ldt_info.base_addr & 0xff000000) | - ((ldt_info.base_addr & 0x00ff0000) >> 16) | - (ldt_info.limit & 0xf0000) | - ((ldt_info.read_exec_only ^ 1) << 9) | - (ldt_info.contents << 10) | - ((ldt_info.seg_not_present ^ 1) << 15) | - (ldt_info.seg_32bit << 22) | - (ldt_info.limit_in_pages << 23) | - 0x7000; - if (!oldmode) - entry_2 |= (ldt_info.useable << 20); + entry_1 = LDT_entry_a(&ldt_info); + entry_2 = LDT_entry_b(&ldt_info); + if (oldmode) + entry_2 &= ~(1 << 20); /* Install the new entry ... */ install: --- linux/arch/i386/boot/setup.S.orig Sun Jun 9 07:26:32 2002 +++ linux/arch/i386/boot/setup.S Mon Aug 12 17:01:11 2002 @@ -1005,9 +1005,14 @@ ret # Descriptor tables +# +# NOTE: if you think the GDT is large, you can make it smaller by just +# defining the KERNEL_CS and KERNEL_DS entries and shifting the gdt +# address down by GDT_ENTRY_KERNEL_CS*8. This puts bogus entries into +# the GDT, but those wont be used so it's not a problem. +# gdt: - .word 0, 0, 0, 0 # dummy - .word 0, 0, 0, 0 # unused + .fill GDT_ENTRY_KERNEL_CS,8,0 .word 0xFFFF # 4Gb - (0x100000*0x1000 = 4Gb) .word 0 # base address = 0 --- linux/include/linux/apm_bios.h.orig Sun Jun 9 07:30:24 2002 +++ linux/include/linux/apm_bios.h Mon Aug 12 17:01:11 2002 @@ -21,8 +21,8 @@ #ifdef __KERNEL__ -#define APM_40 0x40 -#define APM_CS (APM_40 + 8) +#define APM_40 (GDT_ENTRY_APMBIOS_BASE * 8) +#define APM_CS (APM_BASE + 8) #define APM_CS_16 (APM_CS + 8) #define APM_DS (APM_CS_16 + 8) --- linux/include/asm-i386/desc.h.orig Sun Aug 11 17:01:07 2002 +++ linux/include/asm-i386/desc.h Mon Aug 12 17:01:11 2002 @@ -2,50 +2,12 @@ #define __ARCH_DESC_H #include <asm/ldt.h> - -/* - * The layout of the per-CPU GDT under Linux: - * - * 0 - null - * 1 - Thread-Local Storage (TLS) segment - * 2 - kernel code segment - * 3 - kernel data segment - * 4 - user code segment <==== new cacheline - * 5 - user data segment - * 6 - TSS - * 7 - LDT - * 8 - APM BIOS support <==== new cacheline - * 9 - APM BIOS support - * 10 - APM BIOS support - * 11 - APM BIOS support - * 12 - PNPBIOS support <==== new cacheline - * 13 - PNPBIOS support - * 14 - PNPBIOS support - * 15 - PNPBIOS support - * 16 - PNPBIOS support <==== new cacheline - * 17 - not used - * 18 - not used - * 19 - not used - */ -#define TLS_ENTRY 1 -#define TSS_ENTRY 6 -#define LDT_ENTRY 7 -/* - * The interrupt descriptor table has room for 256 idt's, - * the global descriptor table is dependent on the number - * of tasks we can have.. - * - * We pad the GDT to cacheline boundary. - */ -#define IDT_ENTRIES 256 -#define GDT_ENTRIES 20 +#include <asm/segment.h> #ifndef __ASSEMBLY__ #include <asm/mmu.h> -#define GDT_SIZE (GDT_ENTRIES*sizeof(struct desc_struct)) - extern struct desc_struct cpu_gdt_table[NR_CPUS][GDT_ENTRIES]; struct Xgt_desc_struct { @@ -55,8 +17,8 @@ extern struct Xgt_desc_struct idt_descr, cpu_gdt_descr[NR_CPUS]; -#define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (TSS_ENTRY<<3)) -#define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (LDT_ENTRY<<3)) +#define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (GDT_ENTRY_TSS*8)) +#define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (GDT_ENTRY_LDT*8)) /* * This is the ldt that every process will get unless we need @@ -78,21 +40,48 @@ static inline void set_tss_desc(unsigned int cpu, void *addr) { - _set_tssldt_desc(&cpu_gdt_table[cpu][TSS_ENTRY], (int)addr, 235, 0x89); + _set_tssldt_desc(&cpu_gdt_table[cpu][GDT_ENTRY_TSS], (int)addr, 235, 0x89); } static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size) { - _set_tssldt_desc(&cpu_gdt_table[cpu][LDT_ENTRY], (int)addr, ((size << 3)-1), 0x82); + _set_tssldt_desc(&cpu_gdt_table[cpu][GDT_ENTRY_LDT], (int)addr, ((size << 3)-1), 0x82); } -#define TLS_FLAGS_MASK 0x00000001 +#define LDT_entry_a(info) \ + ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff)) -#define TLS_FLAG_WRITABLE 0x00000001 +#define LDT_entry_b(info) \ + (((info)->base_addr & 0xff000000) | \ + (((info)->base_addr & 0x00ff0000) >> 16) | \ + ((info)->limit & 0xf0000) | \ + (((info)->read_exec_only ^ 1) << 9) | \ + ((info)->contents << 10) | \ + (((info)->seg_not_present ^ 1) << 15) | \ + ((info)->seg_32bit << 22) | \ + ((info)->limit_in_pages << 23) | \ + ((info)->useable << 20) | \ + 0x7000) + +#define LDT_empty(info) (\ + (info)->base_addr == 0 && \ + (info)->limit == 0 && \ + (info)->contents == 0 && \ + (info)->read_exec_only == 1 && \ + (info)->seg_32bit == 0 && \ + (info)->limit_in_pages == 0 && \ + (info)->seg_not_present == 1 && \ + (info)->useable == 0 ) + +#if TLS_SIZE != 16 +# error update this code. +#endif -static inline void load_TLS_desc(struct thread_struct *t, unsigned int cpu) +static inline void load_TLS(struct thread_struct *t, unsigned int cpu) { - cpu_gdt_table[cpu][TLS_ENTRY] = t->tls_desc; +#define C(i) cpu_gdt_table[cpu][GDT_ENTRY_TLS_MIN + i] = t->tls_array[i] + C(0); C(1); +#undef C } static inline void clear_LDT(void) --- linux/include/asm-i386/processor.h.orig Sun Aug 11 17:01:07 2002 +++ linux/include/asm-i386/processor.h Mon Aug 12 17:01:11 2002 @@ -22,6 +22,11 @@ unsigned long a,b; }; +#define desc_empty(desc) \ + (!((desc)->a + (desc)->b)) + +#define desc_equal(desc1, desc2) \ + (((desc1)->a == (desc2)->a) && ((desc1)->b == (desc2)->b)) /* * Default implementation of macro that returns current * instruction pointer ("program counter"). @@ -376,8 +381,16 @@ unsigned long v86flags, v86mask, v86mode, saved_esp0; /* IO permissions */ unsigned long *ts_io_bitmap; -/* TLS cached descriptor */ - struct desc_struct tls_desc; + + /* + * cached TLS descriptors. + * + * The offset calculation is needed to not copy the whole TLS + * into the local GDT all the time. + * We count offsets in bytes to reduce context-switch overhead. + */ + int private_tls; + struct desc_struct tls_array[GDT_ENTRY_TLS_MAX + 1]; }; #define INIT_THREAD { \ @@ -401,7 +414,7 @@ 0,0,0,0, /* esp,ebp,esi,edi */ \ 0,0,0,0,0,0, /* es,cs,ss */ \ 0,0,0,0,0,0, /* ds,fs,gs */ \ - LDT_ENTRY,0, /* ldt */ \ + GDT_ENTRY_LDT,0, /* ldt */ \ 0, INVALID_IO_BITMAP_OFFSET, /* tace, bitmap */ \ {~0, } /* ioperm */ \ } --- linux/include/asm-i386/segment.h.orig Sun Jun 9 07:28:19 2002 +++ linux/include/asm-i386/segment.h Mon Aug 12 17:01:11 2002 @@ -1,10 +1,79 @@ #ifndef _ASM_SEGMENT_H #define _ASM_SEGMENT_H -#define __KERNEL_CS 0x10 -#define __KERNEL_DS 0x18 +/* + * The layout of the per-CPU GDT under Linux: + * + * 0 - null + * 1 - reserved + * 2 - reserved + * 3 - reserved + * + * 4 - default user CS <==== new cacheline + * 5 - default user DS + * + * ------- start of TLS (Thread-Local Storage) segments: + * + * 6 - TLS segment #1 [ glibc's TLS segment ] + * 7 - TLS segment #2 [ Wine's %fs Win32 segment ] + * + * ------- start of kernel segments: + * + * 8 - APM BIOS support [ segment 0x40 ] + * 9 - APM BIOS support + * 10 - APM BIOS support + * 11 - APM BIOS support + * 12 - kernel code segment <==== new cacheline + * 13 - kernel data segment + * 14 - TSS + * 15 - LDT + * 16 - PNPBIOS support (16->32 gate) + * 17 - PNPBIOS support + * 18 - PNPBIOS support + * 19 - PNPBIOS support + * 20 - PNPBIOS support + * 21 - reserved + * 22 - reserved + * 23 - reserved + */ +#define GDT_ENTRY_TLS_ENTRIES 2 +#define GDT_ENTRY_TLS_MIN 6 +#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1) -#define __USER_CS 0x23 -#define __USER_DS 0x2B +#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8) + +#define GDT_ENTRY_DEFAULT_USER_CS 4 +#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS * 8 + 3) + +#define GDT_ENTRY_DEFAULT_USER_DS 5 +#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS * 8 + 3) + +#define GDT_ENTRY_KERNEL_BASE 8 + +#define GDT_ENTRY_APMBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 0) +#define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE + 4) +#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8) + +#define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE + 5) +#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8) + +#define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE + 6) +#define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE + 7) + +#define GDT_ENTRY_PNPBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 8) + +/* + * The GDT has 21 entries but we pad it to cacheline boundary: + */ +#define GDT_ENTRIES 24 + +#define GDT_SIZE (GDT_ENTRIES * 8) + +/* + * The interrupt descriptor table has room for 256 idt's, + * the global descriptor table is dependent on the number + * of tasks we can have.. + */ +#define IDT_ENTRIES 256 #endif --- linux/include/asm-i386/unistd.h.orig Sun Aug 11 17:01:07 2002 +++ linux/include/asm-i386/unistd.h Mon Aug 12 17:01:11 2002 @@ -248,6 +248,7 @@ #define __NR_sched_setaffinity 241 #define __NR_sched_getaffinity 242 #define __NR_set_thread_area 243 +#define __NR_get_thread_area 244 /* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */ ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [patch] tls-2.5.31-D5 2002-08-12 17:06 ` [patch] tls-2.5.31-D5 Ingo Molnar @ 2002-08-12 15:21 ` Jakub Jelinek 2002-08-12 17:41 ` Ingo Molnar 2002-08-12 17:24 ` [patch] tls-2.5.31-D7 Ingo Molnar 1 sibling, 1 reply; 47+ messages in thread From: Jakub Jelinek @ 2002-08-12 15:21 UTC (permalink / raw) To: Ingo Molnar Cc: Linus Torvalds, linux-kernel, Alexandre Julliard, Luca Barbieri On Mon, Aug 12, 2002 at 07:06:50PM +0200, Ingo Molnar wrote: > > okay, the attached patch does some more things: > > - moves the first two TLS entries and the user CS/DS entries on the same > cacheline. > > - excludes CS/DS from the TLS space - Luca is right in that it only slows > things down unnecesserily, and there is nothing that cannot be done by > changing the %ds %cs selectors - and every cycle counts in the > context-switch path. > > the only open issues are the number of TLSs supported. I'd vote for making > them 4 and then we can inline the copy and make it unconditional, it will > be 12 cycles to copy them all which alone is better than a branch miss. In > this patch it's 2, thus the copying cost is 6 cycles. > > with 4 entries the 0x40 entry would be taken and APM has to move further > up, and has to save/restore the 0x40 entry across BIOS calls. As each supported TLS entry has its context-switch time cost, I think we should stay at 2 supported TLS entries. My understanding was that the GDT patches were written to optimize the common case (all threaded apps using LDT and with the advent of __thread support causing every single application to use LDT), with 2 TLS entries where one is for libc/libpthread and the other one is for application usage I think it is enough for 99.9% of apps. In the rare case someone needs more, there is still LDT which offers 8192 entries. Jakub ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [patch] tls-2.5.31-D5 2002-08-12 15:21 ` Jakub Jelinek @ 2002-08-12 17:41 ` Ingo Molnar 2002-08-12 15:54 ` Luca Barbieri ` (2 more replies) 0 siblings, 3 replies; 47+ messages in thread From: Ingo Molnar @ 2002-08-12 17:41 UTC (permalink / raw) To: Jakub Jelinek Cc: Linus Torvalds, linux-kernel, Alexandre Julliard, Luca Barbieri On Mon, 12 Aug 2002, Jakub Jelinek wrote: > As each supported TLS entry has its context-switch time cost, I think we > should stay at 2 supported TLS entries. 4 are almost as good - and they also solve the 0x40 problem. > My understanding was that the GDT patches were written to optimize the > common case (all threaded apps using LDT and with the advent of __thread > support causing every single application to use LDT), with 2 TLS entries > where one is for libc/libpthread and the other one is for application > usage I think it is enough for 99.9% of apps. In the rare case someone > needs more, there is still LDT which offers 8192 entries. well, i think i have to agree ... if it wasnt for Wine's 0x40 descriptor. But it certainly does not come free. We could have 3 TLS entries (0x40 will be the last entry), and the copying cost is 9 cycles. (compared to 6 cycles in the 2 entries case.) Good enough? Ingo ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [patch] tls-2.5.31-D5 2002-08-12 17:41 ` Ingo Molnar @ 2002-08-12 15:54 ` Luca Barbieri 2002-08-12 18:03 ` [patch] tls-2.5.31-D9 Ingo Molnar 2002-08-13 1:50 ` [patch] tls-2.5.31-D5 Alexandre Julliard 2 siblings, 0 replies; 47+ messages in thread From: Luca Barbieri @ 2002-08-12 15:54 UTC (permalink / raw) To: Ingo Molnar Cc: Jakub Jelinek, Linus Torvalds, Linux-Kernel ML, Alexandre Julliard [-- Attachment #1: Type: text/plain, Size: 694 bytes --] > well, i think i have to agree ... if it wasnt for Wine's 0x40 descriptor. > But it certainly does not come free. We could have 3 TLS entries (0x40 > will be the last entry), and the copying cost is 9 cycles. (compared to 6 > cycles in the 2 entries case.) Good enough? Or we could leave 0x40 fixed to 0x400 and use only 2. This loses flexibility but anyway the only 2 apps that could use it are dosemu and wine and I think that they already need to have it mapped at 0x400 for vm86 (no one uses 16-bit DLLs anymore). Of course this is only valid if Win32 doesn't use it because otherwise we would lose the ability to do null-pointer checking in programs using Win32 DLLs (e.g. mplayer). [-- Attachment #2: This is a digitally signed message part --] [-- Type: application/pgp-signature, Size: 189 bytes --] ^ permalink raw reply [flat|nested] 47+ messages in thread
* [patch] tls-2.5.31-D9 2002-08-12 17:41 ` Ingo Molnar 2002-08-12 15:54 ` Luca Barbieri @ 2002-08-12 18:03 ` Ingo Molnar 2002-08-13 1:50 ` [patch] tls-2.5.31-D5 Alexandre Julliard 2 siblings, 0 replies; 47+ messages in thread From: Ingo Molnar @ 2002-08-12 18:03 UTC (permalink / raw) To: Jakub Jelinek Cc: Linus Torvalds, linux-kernel, Alexandre Julliard, Luca Barbieri, Christoph Hellwig okay, here is YAGL. (Yet Another GDT Layout) 3 TLS entries, 9 cycles copying and no branches in the context-switch path. The patch also adds Christoph's suggestion and renames modify_ldt_ldt_s (yuck!) to user_desc. (all patches i posted were test-compiled and test-booted against 2.5.31-vanilla.) Ingo --- linux/drivers/pnp/pnpbios_core.c.orig Mon Aug 12 17:51:27 2002 +++ linux/drivers/pnp/pnpbios_core.c Mon Aug 12 17:56:27 2002 @@ -90,7 +90,8 @@ static union pnp_bios_expansion_header * pnp_bios_hdr = NULL; /* The PnP BIOS entries in the GDT */ -#define PNP_GDT (0x0060) +#define PNP_GDT (GDT_ENTRY_PNPBIOS_BASE * 8) + #define PNP_CS32 (PNP_GDT+0x00) /* segment for calling fn */ #define PNP_CS16 (PNP_GDT+0x08) /* code segment for BIOS */ #define PNP_DS (PNP_GDT+0x10) /* data segment for BIOS */ --- linux/arch/i386/kernel/cpu/common.c.orig Mon Aug 12 17:56:01 2002 +++ linux/arch/i386/kernel/cpu/common.c Mon Aug 12 17:56:27 2002 @@ -423,6 +423,7 @@ { int cpu = smp_processor_id(); struct tss_struct * t = init_tss + cpu; + struct thread_struct *thread = ¤t->thread; if (test_and_set_bit(cpu, &cpu_initialized)) { printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); @@ -447,9 +448,13 @@ */ if (cpu) { memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE); - cpu_gdt_descr[cpu].size = GDT_SIZE; + cpu_gdt_descr[cpu].size = GDT_SIZE - 1; cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu]; } + /* + * Set up the per-thread TLS descriptor cache: + */ + memcpy(thread->tls_array, cpu_gdt_table[cpu], GDT_ENTRY_TLS_MAX * 8); __asm__ __volatile__("lgdt %0": "=m" (cpu_gdt_descr[cpu])); __asm__ __volatile__("lidt %0": "=m" (idt_descr)); @@ -468,9 +473,9 @@ BUG(); enter_lazy_tlb(&init_mm, current, cpu); - t->esp0 = current->thread.esp0; + t->esp0 = thread->esp0; set_tss_desc(cpu,t); - cpu_gdt_table[cpu][TSS_ENTRY].b &= 0xfffffdff; + cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff; load_TR_desc(); load_LDT(&init_mm.context); --- linux/arch/i386/kernel/entry.S.orig Mon Aug 12 17:56:02 2002 +++ linux/arch/i386/kernel/entry.S Mon Aug 12 17:56:27 2002 @@ -753,6 +753,7 @@ .long sys_sched_setaffinity .long sys_sched_getaffinity .long sys_set_thread_area + .long sys_get_thread_area .rept NR_syscalls-(.-sys_call_table)/4 .long sys_ni_syscall --- linux/arch/i386/kernel/head.S.orig Mon Aug 12 17:56:02 2002 +++ linux/arch/i386/kernel/head.S Mon Aug 12 17:56:27 2002 @@ -239,12 +239,7 @@ movl %eax,%es movl %eax,%fs movl %eax,%gs -#ifdef CONFIG_SMP - movl $(__KERNEL_DS), %eax - movl %eax,%ss # Reload the stack pointer (segment only) -#else - lss stack_start,%esp # Load processor stack -#endif + movl %eax,%ss xorl %eax,%eax lldt %ax cld # gcc2 wants the direction flag cleared at all times @@ -412,34 +407,40 @@ ALIGN /* - * The Global Descriptor Table contains 20 quadwords, per-CPU. + * The Global Descriptor Table contains 28 quadwords, per-CPU. */ ENTRY(cpu_gdt_table) .quad 0x0000000000000000 /* NULL descriptor */ - .quad 0x0000000000000000 /* TLS descriptor */ - .quad 0x00cf9a000000ffff /* 0x10 kernel 4GB code at 0x00000000 */ - .quad 0x00cf92000000ffff /* 0x18 kernel 4GB data at 0x00000000 */ - .quad 0x00cffa000000ffff /* 0x23 user 4GB code at 0x00000000 */ - .quad 0x00cff2000000ffff /* 0x2b user 4GB data at 0x00000000 */ - .quad 0x0000000000000000 /* TSS descriptor */ - .quad 0x0000000000000000 /* LDT descriptor */ + .quad 0x0000000000000000 /* 0x0b reserved */ + .quad 0x0000000000000000 /* 0x13 reserved */ + .quad 0x0000000000000000 /* 0x1b reserved */ + .quad 0x00cffa000000ffff /* 0x23 user 4GB code at 0x00000000 */ + .quad 0x00cff2000000ffff /* 0x2b user 4GB data at 0x00000000 */ + .quad 0x0000000000000000 /* 0x33 TLS entry 1 */ + .quad 0x0000000000000000 /* 0x3b TLS entry 2 */ + .quad 0x0000000000000000 /* 0x43 TLS entry 3 */ + .quad 0x0000000000000000 /* 0x4b reserved */ + .quad 0x0000000000000000 /* 0x53 reserved */ + .quad 0x0000000000000000 /* 0x5b reserved */ + + .quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */ + .quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */ + .quad 0x0000000000000000 /* 0x70 TSS descriptor */ + .quad 0x0000000000000000 /* 0x78 LDT descriptor */ + + /* Segments used for calling PnP BIOS */ + .quad 0x00c09a0000000000 /* 0x80 32-bit code */ + .quad 0x00809a0000000000 /* 0x88 16-bit code */ + .quad 0x0080920000000000 /* 0x90 16-bit data */ + .quad 0x0080920000000000 /* 0x98 16-bit data */ + .quad 0x0080920000000000 /* 0xa0 16-bit data */ /* * The APM segments have byte granularity and their bases * and limits are set at run time. */ - .quad 0x0040920000000000 /* 0x40 APM set up for bad BIOS's */ - .quad 0x00409a0000000000 /* 0x48 APM CS code */ - .quad 0x00009a0000000000 /* 0x50 APM CS 16 code (16 bit) */ - .quad 0x0040920000000000 /* 0x58 APM DS data */ - /* Segments used for calling PnP BIOS */ - .quad 0x00c09a0000000000 /* 0x60 32-bit code */ - .quad 0x00809a0000000000 /* 0x68 16-bit code */ - .quad 0x0080920000000000 /* 0x70 16-bit data */ - .quad 0x0080920000000000 /* 0x78 16-bit data */ - .quad 0x0080920000000000 /* 0x80 16-bit data */ - .quad 0x0000000000000000 /* 0x88 not used */ - .quad 0x0000000000000000 /* 0x90 not used */ - .quad 0x0000000000000000 /* 0x98 not used */ + .quad 0x00409a0000000000 /* 0xa8 APM CS code */ + .quad 0x00009a0000000000 /* 0xb0 APM CS 16 code (16 bit) */ + .quad 0x0040920000000000 /* 0xb8 APM DS data */ #if CONFIG_SMP .fill (NR_CPUS-1)*GDT_ENTRIES,8,0 /* other CPU's GDT */ --- linux/arch/i386/kernel/process.c.orig Mon Aug 12 17:56:02 2002 +++ linux/arch/i386/kernel/process.c Mon Aug 12 17:56:27 2002 @@ -681,11 +681,8 @@ /* * Load the per-thread Thread-Local Storage descriptor. - * - * NOTE: it's faster to do the two stores unconditionally - * than to branch away. */ - load_TLS_desc(next, cpu); + load_TLS(next, cpu); /* * Save away %fs and %gs. No need to save %es and %ds, as @@ -834,35 +831,114 @@ #undef first_sched /* - * Set the Thread-Local Storage area: + * sys_alloc_thread_area: get a yet unused TLS descriptor index. */ -asmlinkage int sys_set_thread_area(unsigned long base, unsigned long flags) +static int get_free_idx(void) { struct thread_struct *t = ¤t->thread; - int writable = 0; - int cpu; + int idx; - /* do not allow unused flags */ - if (flags & ~TLS_FLAGS_MASK) + for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++) + if (desc_empty(t->tls_array + idx)) + return idx + GDT_ENTRY_TLS_MIN; + return -ESRCH; +} + +/* + * Set a given TLS descriptor: + */ +asmlinkage int sys_set_thread_area(struct user_desc *u_info) +{ + struct thread_struct *t = ¤t->thread; + struct user_desc info; + struct desc_struct *desc; + int cpu, idx; + + if (copy_from_user(&info, u_info, sizeof(info))) + return -EFAULT; + idx = info.entry_number; + + /* + * index -1 means the kernel should try to find and + * allocate an empty descriptor: + */ + if (idx == -1) { + idx = get_free_idx(); + if (idx < 0) + return idx; + if (put_user(idx, &u_info->entry_number)) + return -EFAULT; + } + + if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) return -EINVAL; - if (flags & TLS_FLAG_WRITABLE) - writable = 1; + desc = t->tls_array + idx - GDT_ENTRY_TLS_MIN; /* * We must not get preempted while modifying the TLS. */ cpu = get_cpu(); - t->tls_desc.a = ((base & 0x0000ffff) << 16) | 0xffff; - - t->tls_desc.b = (base & 0xff000000) | ((base & 0x00ff0000) >> 16) | - 0xf0000 | (writable << 9) | (1 << 15) | - (1 << 22) | (1 << 23) | 0x7000; + if (LDT_empty(&info)) { + desc->a = 0; + desc->b = 0; + } else { + desc->a = LDT_entry_a(&info); + desc->b = LDT_entry_b(&info); + } + load_TLS(t, cpu); - load_TLS_desc(t, cpu); put_cpu(); - return TLS_ENTRY*8 + 3; + return 0; +} + +/* + * Get the current Thread-Local Storage area: + */ + +#define GET_BASE(desc) ( \ + (((desc)->a >> 16) & 0x0000ffff) | \ + (((desc)->b << 16) & 0x00ff0000) | \ + ( (desc)->b & 0xff000000) ) + +#define GET_LIMIT(desc) ( \ + ((desc)->a & 0x0ffff) | \ + ((desc)->b & 0xf0000) ) + +#define GET_32BIT(desc) (((desc)->b >> 23) & 1) +#define GET_CONTENTS(desc) (((desc)->b >> 10) & 3) +#define GET_WRITABLE(desc) (((desc)->b >> 9) & 1) +#define GET_LIMIT_PAGES(desc) (((desc)->b >> 23) & 1) +#define GET_PRESENT(desc) (((desc)->b >> 15) & 1) +#define GET_USEABLE(desc) (((desc)->b >> 20) & 1) + +asmlinkage int sys_get_thread_area(struct user_desc *u_info) +{ + struct user_desc info; + struct desc_struct *desc; + int idx; + + if (get_user(idx, &u_info->entry_number)) + return -EFAULT; + if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) + return -EINVAL; + + desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN; + + info.entry_number = idx; + info.base_addr = GET_BASE(desc); + info.limit = GET_LIMIT(desc); + info.seg_32bit = GET_32BIT(desc); + info.contents = GET_CONTENTS(desc); + info.read_exec_only = !GET_WRITABLE(desc); + info.limit_in_pages = GET_LIMIT_PAGES(desc); + info.seg_not_present = !GET_PRESENT(desc); + info.useable = GET_USEABLE(desc); + + if (copy_to_user(u_info, &info, sizeof(info))) + return -EFAULT; + return 0; } --- linux/arch/i386/kernel/suspend.c.orig Mon Aug 12 17:56:02 2002 +++ linux/arch/i386/kernel/suspend.c Mon Aug 12 17:56:27 2002 @@ -207,7 +207,7 @@ struct tss_struct * t = init_tss + cpu; set_tss_desc(cpu,t); /* This just modifies memory; should not be neccessary. But... This is neccessary, because 386 hardware has concept of busy tsc or some similar stupidity. */ - cpu_gdt_table[cpu][TSS_ENTRY].b &= 0xfffffdff; + cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff; load_TR_desc(); /* This does ltr */ load_LDT(¤t->mm->context); /* This does lldt */ --- linux/arch/i386/kernel/ldt.c.orig Mon Aug 12 17:56:02 2002 +++ linux/arch/i386/kernel/ldt.c Mon Aug 12 17:56:27 2002 @@ -170,7 +170,7 @@ struct mm_struct * mm = current->mm; __u32 entry_1, entry_2, *lp; int error; - struct modify_ldt_ldt_s ldt_info; + struct user_desc ldt_info; error = -EINVAL; if (bytecount != sizeof(ldt_info)) @@ -200,32 +200,17 @@ /* Allow LDTs to be cleared by the user. */ if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { - if (oldmode || - (ldt_info.contents == 0 && - ldt_info.read_exec_only == 1 && - ldt_info.seg_32bit == 0 && - ldt_info.limit_in_pages == 0 && - ldt_info.seg_not_present == 1 && - ldt_info.useable == 0 )) { + if (oldmode || LDT_empty(&ldt_info)) { entry_1 = 0; entry_2 = 0; goto install; } } - entry_1 = ((ldt_info.base_addr & 0x0000ffff) << 16) | - (ldt_info.limit & 0x0ffff); - entry_2 = (ldt_info.base_addr & 0xff000000) | - ((ldt_info.base_addr & 0x00ff0000) >> 16) | - (ldt_info.limit & 0xf0000) | - ((ldt_info.read_exec_only ^ 1) << 9) | - (ldt_info.contents << 10) | - ((ldt_info.seg_not_present ^ 1) << 15) | - (ldt_info.seg_32bit << 22) | - (ldt_info.limit_in_pages << 23) | - 0x7000; - if (!oldmode) - entry_2 |= (ldt_info.useable << 20); + entry_1 = LDT_entry_a(&ldt_info); + entry_2 = LDT_entry_b(&ldt_info); + if (oldmode) + entry_2 &= ~(1 << 20); /* Install the new entry ... */ install: --- linux/arch/i386/boot/setup.S.orig Mon Aug 12 17:51:32 2002 +++ linux/arch/i386/boot/setup.S Mon Aug 12 17:56:27 2002 @@ -1005,9 +1005,14 @@ ret # Descriptor tables +# +# NOTE: if you think the GDT is large, you can make it smaller by just +# defining the KERNEL_CS and KERNEL_DS entries and shifting the gdt +# address down by GDT_ENTRY_KERNEL_CS*8. This puts bogus entries into +# the GDT, but those wont be used so it's not a problem. +# gdt: - .word 0, 0, 0, 0 # dummy - .word 0, 0, 0, 0 # unused + .fill GDT_ENTRY_KERNEL_CS,8,0 .word 0xFFFF # 4Gb - (0x100000*0x1000 = 4Gb) .word 0 # base address = 0 --- linux/include/linux/apm_bios.h.orig Mon Aug 12 17:51:39 2002 +++ linux/include/linux/apm_bios.h Mon Aug 12 17:56:27 2002 @@ -21,8 +21,8 @@ #ifdef __KERNEL__ -#define APM_40 0x40 -#define APM_CS (APM_40 + 8) +#define APM_40 (GDT_ENTRY_APMBIOS_BASE * 8) +#define APM_CS (APM_BASE + 8) #define APM_CS_16 (APM_CS + 8) #define APM_DS (APM_CS_16 + 8) --- linux/include/asm-i386/desc.h.orig Mon Aug 12 17:56:15 2002 +++ linux/include/asm-i386/desc.h Mon Aug 12 17:56:27 2002 @@ -2,50 +2,12 @@ #define __ARCH_DESC_H #include <asm/ldt.h> - -/* - * The layout of the per-CPU GDT under Linux: - * - * 0 - null - * 1 - Thread-Local Storage (TLS) segment - * 2 - kernel code segment - * 3 - kernel data segment - * 4 - user code segment <==== new cacheline - * 5 - user data segment - * 6 - TSS - * 7 - LDT - * 8 - APM BIOS support <==== new cacheline - * 9 - APM BIOS support - * 10 - APM BIOS support - * 11 - APM BIOS support - * 12 - PNPBIOS support <==== new cacheline - * 13 - PNPBIOS support - * 14 - PNPBIOS support - * 15 - PNPBIOS support - * 16 - PNPBIOS support <==== new cacheline - * 17 - not used - * 18 - not used - * 19 - not used - */ -#define TLS_ENTRY 1 -#define TSS_ENTRY 6 -#define LDT_ENTRY 7 -/* - * The interrupt descriptor table has room for 256 idt's, - * the global descriptor table is dependent on the number - * of tasks we can have.. - * - * We pad the GDT to cacheline boundary. - */ -#define IDT_ENTRIES 256 -#define GDT_ENTRIES 20 +#include <asm/segment.h> #ifndef __ASSEMBLY__ #include <asm/mmu.h> -#define GDT_SIZE (GDT_ENTRIES*sizeof(struct desc_struct)) - extern struct desc_struct cpu_gdt_table[NR_CPUS][GDT_ENTRIES]; struct Xgt_desc_struct { @@ -55,8 +17,8 @@ extern struct Xgt_desc_struct idt_descr, cpu_gdt_descr[NR_CPUS]; -#define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (TSS_ENTRY<<3)) -#define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (LDT_ENTRY<<3)) +#define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (GDT_ENTRY_TSS*8)) +#define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (GDT_ENTRY_LDT*8)) /* * This is the ldt that every process will get unless we need @@ -78,21 +40,48 @@ static inline void set_tss_desc(unsigned int cpu, void *addr) { - _set_tssldt_desc(&cpu_gdt_table[cpu][TSS_ENTRY], (int)addr, 235, 0x89); + _set_tssldt_desc(&cpu_gdt_table[cpu][GDT_ENTRY_TSS], (int)addr, 235, 0x89); } static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size) { - _set_tssldt_desc(&cpu_gdt_table[cpu][LDT_ENTRY], (int)addr, ((size << 3)-1), 0x82); + _set_tssldt_desc(&cpu_gdt_table[cpu][GDT_ENTRY_LDT], (int)addr, ((size << 3)-1), 0x82); } -#define TLS_FLAGS_MASK 0x00000001 +#define LDT_entry_a(info) \ + ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff)) -#define TLS_FLAG_WRITABLE 0x00000001 +#define LDT_entry_b(info) \ + (((info)->base_addr & 0xff000000) | \ + (((info)->base_addr & 0x00ff0000) >> 16) | \ + ((info)->limit & 0xf0000) | \ + (((info)->read_exec_only ^ 1) << 9) | \ + ((info)->contents << 10) | \ + (((info)->seg_not_present ^ 1) << 15) | \ + ((info)->seg_32bit << 22) | \ + ((info)->limit_in_pages << 23) | \ + ((info)->useable << 20) | \ + 0x7000) + +#define LDT_empty(info) (\ + (info)->base_addr == 0 && \ + (info)->limit == 0 && \ + (info)->contents == 0 && \ + (info)->read_exec_only == 1 && \ + (info)->seg_32bit == 0 && \ + (info)->limit_in_pages == 0 && \ + (info)->seg_not_present == 1 && \ + (info)->useable == 0 ) + +#if TLS_SIZE != 24 +# error update this code. +#endif -static inline void load_TLS_desc(struct thread_struct *t, unsigned int cpu) +static inline void load_TLS(struct thread_struct *t, unsigned int cpu) { - cpu_gdt_table[cpu][TLS_ENTRY] = t->tls_desc; +#define C(i) cpu_gdt_table[cpu][GDT_ENTRY_TLS_MIN + i] = t->tls_array[i] + C(0); C(1); C(2); +#undef C } static inline void clear_LDT(void) --- linux/include/asm-i386/processor.h.orig Mon Aug 12 17:56:16 2002 +++ linux/include/asm-i386/processor.h Mon Aug 12 17:56:27 2002 @@ -22,6 +22,11 @@ unsigned long a,b; }; +#define desc_empty(desc) \ + (!((desc)->a + (desc)->b)) + +#define desc_equal(desc1, desc2) \ + (((desc1)->a == (desc2)->a) && ((desc1)->b == (desc2)->b)) /* * Default implementation of macro that returns current * instruction pointer ("program counter"). @@ -359,6 +364,8 @@ }; struct thread_struct { +/* cached TLS descriptors. */ + struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; unsigned long esp0; unsigned long eip; unsigned long esp; @@ -376,11 +383,10 @@ unsigned long v86flags, v86mask, v86mode, saved_esp0; /* IO permissions */ unsigned long *ts_io_bitmap; -/* TLS cached descriptor */ - struct desc_struct tls_desc; }; #define INIT_THREAD { \ + { { 0, 0 } , }, \ 0, \ 0, 0, 0, 0, \ { [0 ... 7] = 0 }, /* debugging registers */ \ @@ -401,7 +407,7 @@ 0,0,0,0, /* esp,ebp,esi,edi */ \ 0,0,0,0,0,0, /* es,cs,ss */ \ 0,0,0,0,0,0, /* ds,fs,gs */ \ - LDT_ENTRY,0, /* ldt */ \ + GDT_ENTRY_LDT,0, /* ldt */ \ 0, INVALID_IO_BITMAP_OFFSET, /* tace, bitmap */ \ {~0, } /* ioperm */ \ } --- linux/include/asm-i386/segment.h.orig Mon Aug 12 17:56:16 2002 +++ linux/include/asm-i386/segment.h Mon Aug 12 17:56:27 2002 @@ -1,10 +1,79 @@ #ifndef _ASM_SEGMENT_H #define _ASM_SEGMENT_H -#define __KERNEL_CS 0x10 -#define __KERNEL_DS 0x18 +/* + * The layout of the per-CPU GDT under Linux: + * + * 0 - null + * 1 - reserved + * 2 - reserved + * 3 - reserved + * + * 4 - default user CS <==== new cacheline + * 5 - default user DS + * + * ------- start of TLS (Thread-Local Storage) segments: + * + * 6 - TLS segment #1 [ glibc's TLS segment ] + * 7 - TLS segment #2 [ Wine's %fs Win32 segment ] + * 8 - TLS segment #3 + * 9 - reserved + * 10 - reserved + * 11 - reserved + * + * ------- start of kernel segments: + * + * 12 - kernel code segment <==== new cacheline + * 13 - kernel data segment + * 14 - TSS + * 15 - LDT + * 16 - PNPBIOS support (16->32 gate) + * 17 - PNPBIOS support + * 18 - PNPBIOS support + * 19 - PNPBIOS support + * 20 - PNPBIOS support + * 21 - APM BIOS support + * 22 - APM BIOS support + * 23 - APM BIOS support + */ +#define GDT_ENTRY_TLS_ENTRIES 3 +#define GDT_ENTRY_TLS_MIN 6 +#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1) -#define __USER_CS 0x23 -#define __USER_DS 0x2B +#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8) + +#define GDT_ENTRY_DEFAULT_USER_CS 4 +#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS * 8 + 3) + +#define GDT_ENTRY_DEFAULT_USER_DS 5 +#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS * 8 + 3) + +#define GDT_ENTRY_KERNEL_BASE 12 + +#define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE + 0) +#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8) + +#define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE + 1) +#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8) + +#define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE + 2) +#define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE + 3) + +#define GDT_ENTRY_PNPBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 4) +#define GDT_ENTRY_APMBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 9) + +/* + * The GDT has 21 entries but we pad it to cacheline boundary: + */ +#define GDT_ENTRIES 24 + +#define GDT_SIZE (GDT_ENTRIES * 8) + +/* + * The interrupt descriptor table has room for 256 idt's, + * the global descriptor table is dependent on the number + * of tasks we can have.. + */ +#define IDT_ENTRIES 256 #endif --- linux/include/asm-i386/unistd.h.orig Mon Aug 12 17:56:16 2002 +++ linux/include/asm-i386/unistd.h Mon Aug 12 17:56:27 2002 @@ -248,6 +248,7 @@ #define __NR_sched_setaffinity 241 #define __NR_sched_getaffinity 242 #define __NR_set_thread_area 243 +#define __NR_get_thread_area 244 /* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */ --- linux/include/asm-i386/ldt.h.orig Mon Aug 12 17:56:16 2002 +++ linux/include/asm-i386/ldt.h Mon Aug 12 17:56:27 2002 @@ -12,7 +12,7 @@ #define LDT_ENTRY_SIZE 8 #ifndef __ASSEMBLY__ -struct modify_ldt_ldt_s { +struct user_desc { unsigned int entry_number; unsigned long base_addr; unsigned int limit; ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [patch] tls-2.5.31-D5 2002-08-12 17:41 ` Ingo Molnar 2002-08-12 15:54 ` Luca Barbieri 2002-08-12 18:03 ` [patch] tls-2.5.31-D9 Ingo Molnar @ 2002-08-13 1:50 ` Alexandre Julliard 2 siblings, 0 replies; 47+ messages in thread From: Alexandre Julliard @ 2002-08-13 1:50 UTC (permalink / raw) To: Ingo Molnar; +Cc: Jakub Jelinek, Linus Torvalds, linux-kernel, Luca Barbieri Ingo Molnar <mingo@elte.hu> writes: > well, i think i have to agree ... if it wasnt for Wine's 0x40 descriptor. > But it certainly does not come free. We could have 3 TLS entries (0x40 > will be the last entry), and the copying cost is 9 cycles. (compared to 6 > cycles in the 2 entries case.) Good enough? Note that Wine doesn't really require the 0x40 descriptor. As long as we can trap accesses to it and emulate them like we do now, that's good enough. Of course having a GDT entry would save a few cycles, but this only matters for old Win16 apps, so I'm not sure adding even 1 cycle to the task switch time is worth it. -- Alexandre Julliard julliard@winehq.com ^ permalink raw reply [flat|nested] 47+ messages in thread
* [patch] tls-2.5.31-D7 2002-08-12 17:06 ` [patch] tls-2.5.31-D5 Ingo Molnar 2002-08-12 15:21 ` Jakub Jelinek @ 2002-08-12 17:24 ` Ingo Molnar 2002-08-12 15:45 ` Christoph Hellwig 1 sibling, 1 reply; 47+ messages in thread From: Ingo Molnar @ 2002-08-12 17:24 UTC (permalink / raw) To: Linus Torvalds; +Cc: linux-kernel, Alexandre Julliard, Luca Barbieri > the only open issues are the number of TLSs supported. I'd vote for > making them 4 and then we can inline the copy and make it unconditional, > it will be 12 cycles to copy them all which alone is better than a > branch miss. In this patch it's 2, thus the copying cost is 6 cycles. > > with 4 entries the 0x40 entry would be taken and APM has to move further > up, and has to save/restore the 0x40 entry across BIOS calls. the attached patch does this: - there are now 4 freely usable TLS entries, amongst them 0x40 for Wine - the 3 APM segments fit into the hole at the end of the kernel descriptor area exactly => no GDT size increase. - the ->private_tls code is gone - unconditional inline copies are more robust and faster as well. Plus the APM code needs Stephen's fix. I think this is the best approach we had so far. Any objections? Ingo --- linux/drivers/pnp/pnpbios_core.c.orig Sun Aug 11 17:01:17 2002 +++ linux/drivers/pnp/pnpbios_core.c Mon Aug 12 17:21:29 2002 @@ -90,7 +90,8 @@ static union pnp_bios_expansion_header * pnp_bios_hdr = NULL; /* The PnP BIOS entries in the GDT */ -#define PNP_GDT (0x0060) +#define PNP_GDT (GDT_ENTRY_PNPBIOS_BASE * 8) + #define PNP_CS32 (PNP_GDT+0x00) /* segment for calling fn */ #define PNP_CS16 (PNP_GDT+0x08) /* code segment for BIOS */ #define PNP_DS (PNP_GDT+0x10) /* data segment for BIOS */ --- linux/arch/i386/kernel/cpu/common.c.orig Sun Aug 11 17:01:06 2002 +++ linux/arch/i386/kernel/cpu/common.c Mon Aug 12 17:21:29 2002 @@ -423,6 +423,7 @@ { int cpu = smp_processor_id(); struct tss_struct * t = init_tss + cpu; + struct thread_struct *thread = ¤t->thread; if (test_and_set_bit(cpu, &cpu_initialized)) { printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); @@ -447,9 +448,13 @@ */ if (cpu) { memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE); - cpu_gdt_descr[cpu].size = GDT_SIZE; + cpu_gdt_descr[cpu].size = GDT_SIZE - 1; cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu]; } + /* + * Set up the per-thread TLS descriptor cache: + */ + memcpy(thread->tls_array, cpu_gdt_table[cpu], GDT_ENTRY_TLS_MAX * 8); __asm__ __volatile__("lgdt %0": "=m" (cpu_gdt_descr[cpu])); __asm__ __volatile__("lidt %0": "=m" (idt_descr)); @@ -468,9 +473,9 @@ BUG(); enter_lazy_tlb(&init_mm, current, cpu); - t->esp0 = current->thread.esp0; + t->esp0 = thread->esp0; set_tss_desc(cpu,t); - cpu_gdt_table[cpu][TSS_ENTRY].b &= 0xfffffdff; + cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff; load_TR_desc(); load_LDT(&init_mm.context); --- linux/arch/i386/kernel/entry.S.orig Sun Aug 11 17:01:07 2002 +++ linux/arch/i386/kernel/entry.S Mon Aug 12 17:21:29 2002 @@ -753,6 +753,7 @@ .long sys_sched_setaffinity .long sys_sched_getaffinity .long sys_set_thread_area + .long sys_get_thread_area .rept NR_syscalls-(.-sys_call_table)/4 .long sys_ni_syscall --- linux/arch/i386/kernel/head.S.orig Sun Aug 11 17:01:06 2002 +++ linux/arch/i386/kernel/head.S Mon Aug 12 17:21:29 2002 @@ -239,12 +239,7 @@ movl %eax,%es movl %eax,%fs movl %eax,%gs -#ifdef CONFIG_SMP - movl $(__KERNEL_DS), %eax - movl %eax,%ss # Reload the stack pointer (segment only) -#else - lss stack_start,%esp # Load processor stack -#endif + movl %eax,%ss xorl %eax,%eax lldt %ax cld # gcc2 wants the direction flag cleared at all times @@ -412,34 +407,40 @@ ALIGN /* - * The Global Descriptor Table contains 20 quadwords, per-CPU. + * The Global Descriptor Table contains 28 quadwords, per-CPU. */ ENTRY(cpu_gdt_table) .quad 0x0000000000000000 /* NULL descriptor */ - .quad 0x0000000000000000 /* TLS descriptor */ - .quad 0x00cf9a000000ffff /* 0x10 kernel 4GB code at 0x00000000 */ - .quad 0x00cf92000000ffff /* 0x18 kernel 4GB data at 0x00000000 */ - .quad 0x00cffa000000ffff /* 0x23 user 4GB code at 0x00000000 */ - .quad 0x00cff2000000ffff /* 0x2b user 4GB data at 0x00000000 */ - .quad 0x0000000000000000 /* TSS descriptor */ - .quad 0x0000000000000000 /* LDT descriptor */ + .quad 0x0000000000000000 /* 0x0b reserved */ + .quad 0x0000000000000000 /* 0x13 reserved */ + .quad 0x0000000000000000 /* 0x1b reserved */ + .quad 0x00cffa000000ffff /* 0x23 user 4GB code at 0x00000000 */ + .quad 0x00cff2000000ffff /* 0x2b user 4GB data at 0x00000000 */ + .quad 0x0000000000000000 /* 0x33 TLS entry 1 */ + .quad 0x0000000000000000 /* 0x3b TLS entry 2 */ + .quad 0x0000000000000000 /* 0x43 TLS entry 3 */ + .quad 0x0000000000000000 /* 0x4b TLS entry 4 */ + .quad 0x0000000000000000 /* 0x53 reserved */ + .quad 0x0000000000000000 /* 0x5b reserved */ + + .quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */ + .quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */ + .quad 0x0000000000000000 /* 0x70 TSS descriptor */ + .quad 0x0000000000000000 /* 0x78 LDT descriptor */ + + /* Segments used for calling PnP BIOS */ + .quad 0x00c09a0000000000 /* 0x80 32-bit code */ + .quad 0x00809a0000000000 /* 0x88 16-bit code */ + .quad 0x0080920000000000 /* 0x90 16-bit data */ + .quad 0x0080920000000000 /* 0x98 16-bit data */ + .quad 0x0080920000000000 /* 0xa0 16-bit data */ /* * The APM segments have byte granularity and their bases * and limits are set at run time. */ - .quad 0x0040920000000000 /* 0x40 APM set up for bad BIOS's */ - .quad 0x00409a0000000000 /* 0x48 APM CS code */ - .quad 0x00009a0000000000 /* 0x50 APM CS 16 code (16 bit) */ - .quad 0x0040920000000000 /* 0x58 APM DS data */ - /* Segments used for calling PnP BIOS */ - .quad 0x00c09a0000000000 /* 0x60 32-bit code */ - .quad 0x00809a0000000000 /* 0x68 16-bit code */ - .quad 0x0080920000000000 /* 0x70 16-bit data */ - .quad 0x0080920000000000 /* 0x78 16-bit data */ - .quad 0x0080920000000000 /* 0x80 16-bit data */ - .quad 0x0000000000000000 /* 0x88 not used */ - .quad 0x0000000000000000 /* 0x90 not used */ - .quad 0x0000000000000000 /* 0x98 not used */ + .quad 0x00409a0000000000 /* 0xa8 APM CS code */ + .quad 0x00009a0000000000 /* 0xb0 APM CS 16 code (16 bit) */ + .quad 0x0040920000000000 /* 0xb8 APM DS data */ #if CONFIG_SMP .fill (NR_CPUS-1)*GDT_ENTRIES,8,0 /* other CPU's GDT */ --- linux/arch/i386/kernel/process.c.orig Sun Aug 11 17:01:08 2002 +++ linux/arch/i386/kernel/process.c Mon Aug 12 17:21:29 2002 @@ -681,11 +681,8 @@ /* * Load the per-thread Thread-Local Storage descriptor. - * - * NOTE: it's faster to do the two stores unconditionally - * than to branch away. */ - load_TLS_desc(next, cpu); + load_TLS(next, cpu); /* * Save away %fs and %gs. No need to save %es and %ds, as @@ -834,35 +831,114 @@ #undef first_sched /* - * Set the Thread-Local Storage area: + * sys_alloc_thread_area: get a yet unused TLS descriptor index. */ -asmlinkage int sys_set_thread_area(unsigned long base, unsigned long flags) +static int get_free_idx(void) { struct thread_struct *t = ¤t->thread; - int writable = 0; - int cpu; + int idx; - /* do not allow unused flags */ - if (flags & ~TLS_FLAGS_MASK) + for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++) + if (desc_empty(t->tls_array + idx)) + return idx + GDT_ENTRY_TLS_MIN; + return -ESRCH; +} + +/* + * Set a given TLS descriptor: + */ +asmlinkage int sys_set_thread_area(struct modify_ldt_ldt_s *u_info) +{ + struct thread_struct *t = ¤t->thread; + struct modify_ldt_ldt_s info; + struct desc_struct *desc; + int cpu, idx; + + if (copy_from_user(&info, u_info, sizeof(info))) + return -EFAULT; + idx = info.entry_number; + + /* + * index -1 means the kernel should try to find and + * allocate an empty descriptor: + */ + if (idx == -1) { + idx = get_free_idx(); + if (idx < 0) + return idx; + if (put_user(idx, &u_info->entry_number)) + return -EFAULT; + } + + if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) return -EINVAL; - if (flags & TLS_FLAG_WRITABLE) - writable = 1; + desc = t->tls_array + idx - GDT_ENTRY_TLS_MIN; /* * We must not get preempted while modifying the TLS. */ cpu = get_cpu(); - t->tls_desc.a = ((base & 0x0000ffff) << 16) | 0xffff; - - t->tls_desc.b = (base & 0xff000000) | ((base & 0x00ff0000) >> 16) | - 0xf0000 | (writable << 9) | (1 << 15) | - (1 << 22) | (1 << 23) | 0x7000; + if (LDT_empty(&info)) { + desc->a = 0; + desc->b = 0; + } else { + desc->a = LDT_entry_a(&info); + desc->b = LDT_entry_b(&info); + } + load_TLS(t, cpu); - load_TLS_desc(t, cpu); put_cpu(); - return TLS_ENTRY*8 + 3; + return 0; +} + +/* + * Get the current Thread-Local Storage area: + */ + +#define GET_BASE(desc) ( \ + (((desc)->a >> 16) & 0x0000ffff) | \ + (((desc)->b << 16) & 0x00ff0000) | \ + ( (desc)->b & 0xff000000) ) + +#define GET_LIMIT(desc) ( \ + ((desc)->a & 0x0ffff) | \ + ((desc)->b & 0xf0000) ) + +#define GET_32BIT(desc) (((desc)->b >> 23) & 1) +#define GET_CONTENTS(desc) (((desc)->b >> 10) & 3) +#define GET_WRITABLE(desc) (((desc)->b >> 9) & 1) +#define GET_LIMIT_PAGES(desc) (((desc)->b >> 23) & 1) +#define GET_PRESENT(desc) (((desc)->b >> 15) & 1) +#define GET_USEABLE(desc) (((desc)->b >> 20) & 1) + +asmlinkage int sys_get_thread_area(struct modify_ldt_ldt_s *u_info) +{ + struct modify_ldt_ldt_s info; + struct desc_struct *desc; + int idx; + + if (get_user(idx, &u_info->entry_number)) + return -EFAULT; + if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) + return -EINVAL; + + desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN; + + info.entry_number = idx; + info.base_addr = GET_BASE(desc); + info.limit = GET_LIMIT(desc); + info.seg_32bit = GET_32BIT(desc); + info.contents = GET_CONTENTS(desc); + info.read_exec_only = !GET_WRITABLE(desc); + info.limit_in_pages = GET_LIMIT_PAGES(desc); + info.seg_not_present = !GET_PRESENT(desc); + info.useable = GET_USEABLE(desc); + + if (copy_to_user(u_info, &info, sizeof(info))) + return -EFAULT; + return 0; } --- linux/arch/i386/kernel/suspend.c.orig Sun Aug 11 17:01:06 2002 +++ linux/arch/i386/kernel/suspend.c Mon Aug 12 17:21:29 2002 @@ -207,7 +207,7 @@ struct tss_struct * t = init_tss + cpu; set_tss_desc(cpu,t); /* This just modifies memory; should not be neccessary. But... This is neccessary, because 386 hardware has concept of busy tsc or some similar stupidity. */ - cpu_gdt_table[cpu][TSS_ENTRY].b &= 0xfffffdff; + cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff; load_TR_desc(); /* This does ltr */ load_LDT(¤t->mm->context); /* This does lldt */ --- linux/arch/i386/kernel/ldt.c.orig Sun Aug 11 17:01:04 2002 +++ linux/arch/i386/kernel/ldt.c Mon Aug 12 17:21:29 2002 @@ -200,32 +200,17 @@ /* Allow LDTs to be cleared by the user. */ if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { - if (oldmode || - (ldt_info.contents == 0 && - ldt_info.read_exec_only == 1 && - ldt_info.seg_32bit == 0 && - ldt_info.limit_in_pages == 0 && - ldt_info.seg_not_present == 1 && - ldt_info.useable == 0 )) { + if (oldmode || LDT_empty(&ldt_info)) { entry_1 = 0; entry_2 = 0; goto install; } } - entry_1 = ((ldt_info.base_addr & 0x0000ffff) << 16) | - (ldt_info.limit & 0x0ffff); - entry_2 = (ldt_info.base_addr & 0xff000000) | - ((ldt_info.base_addr & 0x00ff0000) >> 16) | - (ldt_info.limit & 0xf0000) | - ((ldt_info.read_exec_only ^ 1) << 9) | - (ldt_info.contents << 10) | - ((ldt_info.seg_not_present ^ 1) << 15) | - (ldt_info.seg_32bit << 22) | - (ldt_info.limit_in_pages << 23) | - 0x7000; - if (!oldmode) - entry_2 |= (ldt_info.useable << 20); + entry_1 = LDT_entry_a(&ldt_info); + entry_2 = LDT_entry_b(&ldt_info); + if (oldmode) + entry_2 &= ~(1 << 20); /* Install the new entry ... */ install: --- linux/arch/i386/boot/setup.S.orig Sun Jun 9 07:26:32 2002 +++ linux/arch/i386/boot/setup.S Mon Aug 12 17:21:29 2002 @@ -1005,9 +1005,14 @@ ret # Descriptor tables +# +# NOTE: if you think the GDT is large, you can make it smaller by just +# defining the KERNEL_CS and KERNEL_DS entries and shifting the gdt +# address down by GDT_ENTRY_KERNEL_CS*8. This puts bogus entries into +# the GDT, but those wont be used so it's not a problem. +# gdt: - .word 0, 0, 0, 0 # dummy - .word 0, 0, 0, 0 # unused + .fill GDT_ENTRY_KERNEL_CS,8,0 .word 0xFFFF # 4Gb - (0x100000*0x1000 = 4Gb) .word 0 # base address = 0 --- linux/include/linux/apm_bios.h.orig Sun Jun 9 07:30:24 2002 +++ linux/include/linux/apm_bios.h Mon Aug 12 17:21:29 2002 @@ -21,8 +21,8 @@ #ifdef __KERNEL__ -#define APM_40 0x40 -#define APM_CS (APM_40 + 8) +#define APM_40 (GDT_ENTRY_APMBIOS_BASE * 8) +#define APM_CS (APM_BASE + 8) #define APM_CS_16 (APM_CS + 8) #define APM_DS (APM_CS_16 + 8) --- linux/include/asm-i386/desc.h.orig Sun Aug 11 17:01:07 2002 +++ linux/include/asm-i386/desc.h Mon Aug 12 17:21:29 2002 @@ -2,50 +2,12 @@ #define __ARCH_DESC_H #include <asm/ldt.h> - -/* - * The layout of the per-CPU GDT under Linux: - * - * 0 - null - * 1 - Thread-Local Storage (TLS) segment - * 2 - kernel code segment - * 3 - kernel data segment - * 4 - user code segment <==== new cacheline - * 5 - user data segment - * 6 - TSS - * 7 - LDT - * 8 - APM BIOS support <==== new cacheline - * 9 - APM BIOS support - * 10 - APM BIOS support - * 11 - APM BIOS support - * 12 - PNPBIOS support <==== new cacheline - * 13 - PNPBIOS support - * 14 - PNPBIOS support - * 15 - PNPBIOS support - * 16 - PNPBIOS support <==== new cacheline - * 17 - not used - * 18 - not used - * 19 - not used - */ -#define TLS_ENTRY 1 -#define TSS_ENTRY 6 -#define LDT_ENTRY 7 -/* - * The interrupt descriptor table has room for 256 idt's, - * the global descriptor table is dependent on the number - * of tasks we can have.. - * - * We pad the GDT to cacheline boundary. - */ -#define IDT_ENTRIES 256 -#define GDT_ENTRIES 20 +#include <asm/segment.h> #ifndef __ASSEMBLY__ #include <asm/mmu.h> -#define GDT_SIZE (GDT_ENTRIES*sizeof(struct desc_struct)) - extern struct desc_struct cpu_gdt_table[NR_CPUS][GDT_ENTRIES]; struct Xgt_desc_struct { @@ -55,8 +17,8 @@ extern struct Xgt_desc_struct idt_descr, cpu_gdt_descr[NR_CPUS]; -#define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (TSS_ENTRY<<3)) -#define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (LDT_ENTRY<<3)) +#define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (GDT_ENTRY_TSS*8)) +#define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (GDT_ENTRY_LDT*8)) /* * This is the ldt that every process will get unless we need @@ -78,21 +40,48 @@ static inline void set_tss_desc(unsigned int cpu, void *addr) { - _set_tssldt_desc(&cpu_gdt_table[cpu][TSS_ENTRY], (int)addr, 235, 0x89); + _set_tssldt_desc(&cpu_gdt_table[cpu][GDT_ENTRY_TSS], (int)addr, 235, 0x89); } static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size) { - _set_tssldt_desc(&cpu_gdt_table[cpu][LDT_ENTRY], (int)addr, ((size << 3)-1), 0x82); + _set_tssldt_desc(&cpu_gdt_table[cpu][GDT_ENTRY_LDT], (int)addr, ((size << 3)-1), 0x82); } -#define TLS_FLAGS_MASK 0x00000001 +#define LDT_entry_a(info) \ + ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff)) -#define TLS_FLAG_WRITABLE 0x00000001 +#define LDT_entry_b(info) \ + (((info)->base_addr & 0xff000000) | \ + (((info)->base_addr & 0x00ff0000) >> 16) | \ + ((info)->limit & 0xf0000) | \ + (((info)->read_exec_only ^ 1) << 9) | \ + ((info)->contents << 10) | \ + (((info)->seg_not_present ^ 1) << 15) | \ + ((info)->seg_32bit << 22) | \ + ((info)->limit_in_pages << 23) | \ + ((info)->useable << 20) | \ + 0x7000) + +#define LDT_empty(info) (\ + (info)->base_addr == 0 && \ + (info)->limit == 0 && \ + (info)->contents == 0 && \ + (info)->read_exec_only == 1 && \ + (info)->seg_32bit == 0 && \ + (info)->limit_in_pages == 0 && \ + (info)->seg_not_present == 1 && \ + (info)->useable == 0 ) + +#if TLS_SIZE != 32 +# error update this code. +#endif -static inline void load_TLS_desc(struct thread_struct *t, unsigned int cpu) +static inline void load_TLS(struct thread_struct *t, unsigned int cpu) { - cpu_gdt_table[cpu][TLS_ENTRY] = t->tls_desc; +#define C(i) cpu_gdt_table[cpu][GDT_ENTRY_TLS_MIN + i] = t->tls_array[i] + C(0); C(1); C(2); C(3); +#undef C } static inline void clear_LDT(void) --- linux/include/asm-i386/processor.h.orig Sun Aug 11 17:01:07 2002 +++ linux/include/asm-i386/processor.h Mon Aug 12 17:21:29 2002 @@ -22,6 +22,11 @@ unsigned long a,b; }; +#define desc_empty(desc) \ + (!((desc)->a + (desc)->b)) + +#define desc_equal(desc1, desc2) \ + (((desc1)->a == (desc2)->a) && ((desc1)->b == (desc2)->b)) /* * Default implementation of macro that returns current * instruction pointer ("program counter"). @@ -359,6 +364,8 @@ }; struct thread_struct { +/* cached TLS descriptors. */ + struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; unsigned long esp0; unsigned long eip; unsigned long esp; @@ -376,11 +383,10 @@ unsigned long v86flags, v86mask, v86mode, saved_esp0; /* IO permissions */ unsigned long *ts_io_bitmap; -/* TLS cached descriptor */ - struct desc_struct tls_desc; }; #define INIT_THREAD { \ + { { 0, 0 } , }, \ 0, \ 0, 0, 0, 0, \ { [0 ... 7] = 0 }, /* debugging registers */ \ @@ -401,7 +407,7 @@ 0,0,0,0, /* esp,ebp,esi,edi */ \ 0,0,0,0,0,0, /* es,cs,ss */ \ 0,0,0,0,0,0, /* ds,fs,gs */ \ - LDT_ENTRY,0, /* ldt */ \ + GDT_ENTRY_LDT,0, /* ldt */ \ 0, INVALID_IO_BITMAP_OFFSET, /* tace, bitmap */ \ {~0, } /* ioperm */ \ } --- linux/include/asm-i386/segment.h.orig Sun Jun 9 07:28:19 2002 +++ linux/include/asm-i386/segment.h Mon Aug 12 17:21:29 2002 @@ -1,10 +1,79 @@ #ifndef _ASM_SEGMENT_H #define _ASM_SEGMENT_H -#define __KERNEL_CS 0x10 -#define __KERNEL_DS 0x18 +/* + * The layout of the per-CPU GDT under Linux: + * + * 0 - null + * 1 - reserved + * 2 - reserved + * 3 - reserved + * + * 4 - default user CS <==== new cacheline + * 5 - default user DS + * + * ------- start of TLS (Thread-Local Storage) segments: + * + * 6 - TLS segment #1 [ glibc's TLS segment ] + * 7 - TLS segment #2 [ Wine's %fs Win32 segment ] + * 8 - TLS segment #3 + * 9 - TLS segment #4 + * 10 - reserved + * 11 - reserved + * + * ------- start of kernel segments: + * + * 12 - kernel code segment <==== new cacheline + * 13 - kernel data segment + * 14 - TSS + * 15 - LDT + * 16 - PNPBIOS support (16->32 gate) + * 17 - PNPBIOS support + * 18 - PNPBIOS support + * 19 - PNPBIOS support + * 20 - PNPBIOS support + * 21 - APM BIOS support + * 22 - APM BIOS support + * 23 - APM BIOS support + */ +#define GDT_ENTRY_TLS_ENTRIES 4 +#define GDT_ENTRY_TLS_MIN 6 +#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1) -#define __USER_CS 0x23 -#define __USER_DS 0x2B +#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8) + +#define GDT_ENTRY_DEFAULT_USER_CS 4 +#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS * 8 + 3) + +#define GDT_ENTRY_DEFAULT_USER_DS 5 +#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS * 8 + 3) + +#define GDT_ENTRY_KERNEL_BASE 12 + +#define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE + 0) +#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8) + +#define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE + 1) +#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8) + +#define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE + 2) +#define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE + 3) + +#define GDT_ENTRY_PNPBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 4) +#define GDT_ENTRY_APMBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 9) + +/* + * The GDT has 21 entries but we pad it to cacheline boundary: + */ +#define GDT_ENTRIES 24 + +#define GDT_SIZE (GDT_ENTRIES * 8) + +/* + * The interrupt descriptor table has room for 256 idt's, + * the global descriptor table is dependent on the number + * of tasks we can have.. + */ +#define IDT_ENTRIES 256 #endif --- linux/include/asm-i386/unistd.h.orig Sun Aug 11 17:01:07 2002 +++ linux/include/asm-i386/unistd.h Mon Aug 12 17:21:29 2002 @@ -248,6 +248,7 @@ #define __NR_sched_setaffinity 241 #define __NR_sched_getaffinity 242 #define __NR_set_thread_area 243 +#define __NR_get_thread_area 244 /* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */ ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [patch] tls-2.5.31-D7 2002-08-12 17:24 ` [patch] tls-2.5.31-D7 Ingo Molnar @ 2002-08-12 15:45 ` Christoph Hellwig 0 siblings, 0 replies; 47+ messages in thread From: Christoph Hellwig @ 2002-08-12 15:45 UTC (permalink / raw) To: Ingo Molnar Cc: Linus Torvalds, linux-kernel, Alexandre Julliard, Luca Barbieri On Mon, Aug 12, 2002 at 07:24:25PM +0200, Ingo Molnar wrote: > the attached patch does this: > > - there are now 4 freely usable TLS entries, amongst them 0x40 for Wine > > - the 3 APM segments fit into the hole at the end of the kernel > descriptor area exactly => no GDT size increase. > > - the ->private_tls code is gone - unconditional inline copies are more > robust and faster as well. > > Plus the APM code needs Stephen's fix. I think this is the best approach > we had so far. Any objections? Patch looks good so far, but _please_ rename struct modify_ldt_ldt_s to something more sensible. (yes, I know it existed before, but with this patch the name is even more stupid than before) ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [patch] tls-2.5.30-A1 2002-08-07 18:10 [patch] tls-2.5.30-A1 Ingo Molnar 2002-08-07 18:33 ` Linus Torvalds @ 2002-08-07 19:02 ` Christoph Hellwig 2002-08-08 12:25 ` Jamie Lokier 2 siblings, 0 replies; 47+ messages in thread From: Christoph Hellwig @ 2002-08-07 19:02 UTC (permalink / raw) To: Ingo Molnar; +Cc: Linus Torvalds, linux-kernel, Alexandre Julliard On Wed, Aug 07, 2002 at 08:10:40PM +0200, Ingo Molnar wrote: > > the attached patch (against BK-curr + Luca Barbieri's two TLS patches) > does two things: > > - it implements a second TLS entry for Wine's purposes. The sys_set_thread_area interface gets worse with every patch you post.. Why do you really need a magic multiplexer syscall (you could have just used prctl if you don't need a sane interface..)? What about a proper interface like: asmlinkage int sys_set_thread_area(int entry, unsigned long base, int writeable) instead? ^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [patch] tls-2.5.30-A1 2002-08-07 18:10 [patch] tls-2.5.30-A1 Ingo Molnar 2002-08-07 18:33 ` Linus Torvalds 2002-08-07 19:02 ` [patch] tls-2.5.30-A1 Christoph Hellwig @ 2002-08-08 12:25 ` Jamie Lokier 2 siblings, 0 replies; 47+ messages in thread From: Jamie Lokier @ 2002-08-08 12:25 UTC (permalink / raw) To: Ingo Molnar; +Cc: Linus Torvalds, linux-kernel, Alexandre Julliard Ingo Molnar wrote: > the attached patch (against BK-curr + Luca Barbieri's two TLS patches) > does two things: > > - it implements a second TLS entry for Wine's purposes. Oh good; I was going to ask for this. Wine isn't the only program that wants to use its own thread-local storage mechanism and link with Glibc at the same time. The LDT works, but with limitations and overhead. thanks, -- Jamie ^ permalink raw reply [flat|nested] 47+ messages in thread
end of thread, other threads:[~2002-08-13 1:47 UTC | newest] Thread overview: 47+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2002-08-07 18:10 [patch] tls-2.5.30-A1 Ingo Molnar 2002-08-07 18:33 ` Linus Torvalds 2002-08-07 18:43 ` Stephen Rothwell 2002-08-07 18:57 ` Linus Torvalds 2002-08-07 19:40 ` Alexandre Julliard 2002-08-07 19:31 ` Ingo Molnar 2002-08-07 19:49 ` Alexandre Julliard 2002-08-07 22:01 ` Alan Cox 2002-08-07 22:36 ` Luca Barbieri 2002-08-07 22:54 ` Ingo Molnar 2002-08-07 23:21 ` Luca Barbieri 2002-08-07 23:35 ` DMA Problems with Intel 845 Chipset and Northwood CPU Mark Cuss 2002-08-08 0:58 ` John L. Korpi 2002-08-08 16:12 ` Mark Cuss 2002-08-11 21:46 ` [patch] tls-2.5.31-C3 Ingo Molnar 2002-08-12 7:34 ` Stephen Rothwell 2002-08-12 10:07 ` Ingo Molnar 2002-08-12 8:23 ` Stephen Rothwell 2002-08-12 10:08 ` Alan Cox 2002-08-12 10:49 ` Ingo Molnar 2002-08-12 10:34 ` Alan Cox 2002-08-12 12:17 ` Ingo Molnar 2002-08-12 11:47 ` Alan Cox 2002-08-12 12:55 ` Ingo Molnar 2002-08-12 12:29 ` Alan Cox 2002-08-12 10:35 ` Alan Cox 2002-08-12 13:10 ` Kasper Dupont 2002-08-12 15:20 ` Ingo Molnar 2002-08-12 14:46 ` Stephen Rothwell 2002-08-12 12:18 ` Luca Barbieri 2002-08-12 15:12 ` Ingo Molnar 2002-08-12 13:43 ` Luca Barbieri 2002-08-12 15:57 ` Ingo Molnar 2002-08-12 14:17 ` Luca Barbieri 2002-08-12 15:53 ` [patch] tls-2.5.31-D3 Ingo Molnar 2002-08-12 16:13 ` [patch] tls-2.5.31-D4 Ingo Molnar 2002-08-12 14:32 ` Luca Barbieri 2002-08-12 17:06 ` [patch] tls-2.5.31-D5 Ingo Molnar 2002-08-12 15:21 ` Jakub Jelinek 2002-08-12 17:41 ` Ingo Molnar 2002-08-12 15:54 ` Luca Barbieri 2002-08-12 18:03 ` [patch] tls-2.5.31-D9 Ingo Molnar 2002-08-13 1:50 ` [patch] tls-2.5.31-D5 Alexandre Julliard 2002-08-12 17:24 ` [patch] tls-2.5.31-D7 Ingo Molnar 2002-08-12 15:45 ` Christoph Hellwig 2002-08-07 19:02 ` [patch] tls-2.5.30-A1 Christoph Hellwig 2002-08-08 12:25 ` Jamie Lokier
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox