* [RFC, PATCH 11/24] i386 Vmi segment changes
@ 2006-03-13 18:07 Zachary Amsden
0 siblings, 0 replies; only message in thread
From: Zachary Amsden @ 2006-03-13 18:07 UTC (permalink / raw)
To: Linus Torvalds, Linux Kernel Mailing List,
Virtualization Mailing List, Xen-devel, Andrew Morton,
Zachary Amsden, Dan Hecht, Dan Arai, Anne Holler,
Pratap Subrahmanyam, Christopher Li, Joshua LeVasseur,
Chris Wright, Rik Van Riel, Jyothy Reddy, Jack Lo, Kip Macy,
Jan Beulich, Ky Srinivasan, Wim Coekaerts, Leendert van Doorn,
Zachary Amsden
Make the Linux kernel able to run at CPL 0, 1, or 2. There are a few
limited places where CPL-0 is assumed, and they can be converted very
efficiently into a supervisor CPL check instead of a CPL-0 check.
This step prepares the kernel for running in direct execution under
a hypervisor.
Note the user_mode_vm macro used in ptrace.h is very similar to the
flag mixing of EFLAGS and CS used to test in one branch in entry.S.
To make the COMPARE_SEGMENT_REG macro work, it must contain both the
push and the pop, requiring an additional load of EAX after the
possible stack fixup. This is because FIXUP_ESPFIX_STACK destroys
%EAX again with a call to C code.
In all, the overhead is couple of instructions, and no extra branches.
Note that I prefer to use the testing of selectors in the form:
SELECTOR_CLEAR_RPL(sel) == __KERNEL_SEL
Instead of SELECTOR == (%seg), where %seg is the live register value.
This is because moves from segment registers are more costly than a
single ALU instruction, and both cost a temporary register. Still,
pushing the raw %cs value in the NMI after sysenter debug trap fixup
code instead of __KERNEL_CS | RPL is just as efficient, since fetching
RPL would require a %cs load anyway.
The switch from __KERNEL_DS to __USER_DS at one point is for
convenience, since they are both equivalent for %ds and %es segments;
__KERNEL_DS, contrary to the name, is only useful for the %ss segment.
Signed-off-by: Zachary Amsden <zach@vmware.com>
Index: linux-2.6.16-rc5/include/asm-i386/ptrace.h
===================================================================
--- linux-2.6.16-rc5.orig/include/asm-i386/ptrace.h 2006-03-08 16:58:49.000000000 -0800
+++ linux-2.6.16-rc5/include/asm-i386/ptrace.h 2006-03-08 17:10:26.000000000 -0800
@@ -60,6 +60,7 @@ struct pt_regs {
#ifdef __KERNEL__
#include <asm/vm86.h>
+#include <asm/segment.h>
struct task_struct;
extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code);
@@ -73,11 +74,11 @@ extern void send_sigtrap(struct task_str
*/
static inline int user_mode(struct pt_regs *regs)
{
- return (regs->xcs & 3) != 0;
+ return (regs->xcs & SEGMENT_RPL_MASK) == 3;
}
static inline int user_mode_vm(struct pt_regs *regs)
{
- return ((regs->xcs & 3) | (regs->eflags & VM_MASK)) != 0;
+ return (((regs->xcs & SEGMENT_RPL_MASK) | (regs->eflags & VM_MASK)) >= 3);
}
#define instruction_pointer(regs) ((regs)->eip)
#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
Index: linux-2.6.16-rc5/include/asm-i386/segment.h
===================================================================
--- linux-2.6.16-rc5.orig/include/asm-i386/segment.h 2006-03-08 16:58:49.000000000 -0800
+++ linux-2.6.16-rc5/include/asm-i386/segment.h 2006-03-08 17:10:26.000000000 -0800
@@ -112,4 +112,9 @@
*/
#define IDT_ENTRIES 256
+#define SEGMENT_RPL_MASK 0x03
+#define SEGMENT_TI_MASK 0x04
+
+#include <mach_segment.h>
+
#endif
Index: linux-2.6.16-rc5/include/asm-i386/mach-vmi/mach_segment.h
===================================================================
--- linux-2.6.16-rc5.orig/include/asm-i386/mach-vmi/mach_segment.h 2006-03-08 17:10:26.000000000 -0800
+++ linux-2.6.16-rc5/include/asm-i386/mach-vmi/mach_segment.h 2006-03-08 17:10:26.000000000 -0800
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2005, VMware, Inc.
+ *
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Send feedback to zach@vmware.com
+ *
+ */
+
+
+#ifndef __MACH_SEGMENT_H
+#define __MACH_SEGMENT_H
+
+#if !defined(CONFIG_X86_VMI)
+# error invalid sub-arch include
+#endif
+
+#ifndef __ASSEMBLY__
+static inline unsigned get_kernel_rpl(void)
+{
+ unsigned cs;
+ __asm__ ("movl %%cs,%0" : "=r"(cs):);
+ return cs & SEGMENT_RPL_MASK;
+}
+#endif
+
+#define COMPARE_SEGMENT_STACK(segment, offset) \
+ pushl %eax; \
+ mov offset+4(%esp), %eax; \
+ andl $~SEGMENT_RPL_MASK, %eax; \
+ cmpw $segment,%ax; \
+ popl %eax;
+
+#define COMPARE_SEGMENT_REG(segment, reg) \
+ pushl %eax; \
+ mov reg, %eax; \
+ andl $~SEGMENT_RPL_MASK, %eax; \
+ cmpw $segment,%ax; \
+ popl %eax;
+
+#endif
Index: linux-2.6.16-rc5/include/asm-i386/mach-default/mach_segment.h
===================================================================
--- linux-2.6.16-rc5.orig/include/asm-i386/mach-default/mach_segment.h 2006-03-08 17:10:26.000000000 -0800
+++ linux-2.6.16-rc5/include/asm-i386/mach-default/mach_segment.h 2006-03-09 15:51:42.000000000 -0800
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2005, VMware, Inc.
+ *
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Send feedback to zach@vmware.com
+ *
+ */
+
+
+#ifndef __MACH_SEGMENT_H
+#define __MACH_SEGMENT_H
+
+#define get_kernel_rpl() 0
+
+#define COMPARE_SEGMENT_STACK(segment, offset) \
+ cmpw $segment, offset(%esp);
+
+#define COMPARE_SEGMENT_REG(segment, reg) \
+ pushl %eax; \
+ mov reg, %eax; \
+ cmpw $segment,%ax; \
+ popl %eax;
+#endif
Index: linux-2.6.16-rc5/arch/i386/kernel/entry.S
===================================================================
--- linux-2.6.16-rc5.orig/arch/i386/kernel/entry.S 2006-03-08 17:10:25.000000000 -0800
+++ linux-2.6.16-rc5/arch/i386/kernel/entry.S 2006-03-08 17:10:26.000000000 -0800
@@ -145,9 +145,11 @@ ret_from_exception:
ret_from_intr:
GET_THREAD_INFO(%ebp)
movl EFLAGS(%esp), %eax # mix EFLAGS and CS
+ andl $VM_MASK, %eax
movb CS(%esp), %al
- testl $(VM_MASK | 3), %eax
- jz resume_kernel
+ andb $SEGMENT_RPL_MASK, %al
+ cmpl $SEGMENT_RPL_MASK, %eax
+ jb resume_kernel # returning to kernel or vm86-space
ENTRY(resume_userspace)
CLI # make sure we don't miss an interrupt
# setting need_resched or sigpending
@@ -382,17 +384,14 @@ syscall_badsys:
/* put ESP to the proper location */ \
movl %eax, %esp;
#define UNWIND_ESPFIX_STACK \
- pushl %eax; \
- movl %ss, %eax; \
- /* see if on 16bit stack */ \
- cmpw $__ESPFIX_SS, %ax; \
+ COMPARE_SEGMENT_REG(__ESPFIX_SS, %ss) \
jne 28f; \
- movl $__KERNEL_DS, %edx; \
+ movl $__USER_DS, %edx; \
movl %edx, %ds; \
movl %edx, %es; \
/* switch to 32bit stack */ \
FIXUP_ESPFIX_STACK \
-28: popl %eax;
+28:;
/*
* Build the entry stubs and pointer table with
@@ -451,6 +450,7 @@ error_code:
pushl %es
UNWIND_ESPFIX_STACK
popl %ecx
+ movl EAX(%esp), %eax
movl ES(%esp), %edi # get the function address
movl ORIG_EAX(%esp), %edx # get the error code
movl %eax, ORIG_EAX(%esp)
@@ -501,12 +501,12 @@ device_not_available_emulate:
* the instruction that would have done it for sysenter.
*/
#define FIX_STACK(offset, ok, label) \
- cmpw $__KERNEL_CS,4(%esp); \
+ COMPARE_SEGMENT_STACK(__KERNEL_CS, 4) \
jne ok; \
label: \
movl TSS_sysenter_esp0+offset(%esp),%esp; \
pushfl; \
- pushl $__KERNEL_CS; \
+ push %cs; \
pushl $sysenter_past_esp
KPROBE_ENTRY(debug)
@@ -530,10 +530,7 @@ debug_stack_correct:
* fault happened on the sysenter path.
*/
ENTRY(nmi)
- pushl %eax
- movl %ss, %eax
- cmpw $__ESPFIX_SS, %ax
- popl %eax
+ COMPARE_SEGMENT_REG(__ESPFIX_SS, %ss)
je nmi_16bit_stack
cmpl $sysenter_entry,(%esp)
je nmi_stack_fixup
@@ -560,7 +557,7 @@ nmi_stack_fixup:
FIX_STACK(12,nmi_stack_correct, 1)
jmp nmi_stack_correct
nmi_debug_stack_check:
- cmpw $__KERNEL_CS,16(%esp)
+ COMPARE_SEGMENT_STACK(__KERNEL_CS, 16)
jne nmi_stack_correct
cmpl $debug,(%esp)
jb nmi_stack_correct
Index: linux-2.6.16-rc5/arch/i386/kernel/process.c
===================================================================
--- linux-2.6.16-rc5.orig/arch/i386/kernel/process.c 2006-03-08 16:58:49.000000000 -0800
+++ linux-2.6.16-rc5/arch/i386/kernel/process.c 2006-03-09 15:52:17.000000000 -0800
@@ -348,7 +348,7 @@ int kernel_thread(int (*fn)(void *), voi
regs.xes = __USER_DS;
regs.orig_eax = -1;
regs.eip = (unsigned long) kernel_thread_helper;
- regs.xcs = __KERNEL_CS;
+ regs.xcs = __KERNEL_CS | get_kernel_rpl();
regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
/* Ok, create the new process.. */
Index: linux-2.6.16-rc5/arch/i386/kernel/traps.c
===================================================================
--- linux-2.6.16-rc5.orig/arch/i386/kernel/traps.c 2006-03-08 17:10:25.000000000 -0800
+++ linux-2.6.16-rc5/arch/i386/kernel/traps.c 2006-03-08 17:10:26.000000000 -0800
@@ -970,10 +970,10 @@ fastcall void setup_x86_bogus_stack(unsi
memcpy((void *)(stack_bot + iret_frame16_off), ®s->eip, 20);
/* fill in the switch pointers */
switch16_ptr[0] = (regs->esp & 0xffff0000) | iret_frame16_off;
- switch16_ptr[1] = __ESPFIX_SS;
+ switch16_ptr[1] = __ESPFIX_SS | get_kernel_rpl();
switch32_ptr[0] = (unsigned long)stk + sizeof(struct pt_regs) +
8 - CPU_16BIT_STACK_SIZE;
- switch32_ptr[1] = __KERNEL_DS;
+ switch32_ptr[1] = __KERNEL_DS | get_kernel_rpl();
}
fastcall unsigned char * fixup_x86_bogus_stack(unsigned short sp)
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2006-03-13 18:07 UTC | newest]
Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-03-13 18:07 [RFC, PATCH 11/24] i386 Vmi segment changes Zachary Amsden
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox